In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy as np

In [None]:
train_df = pd.read_csv("data/train_new.csv")

# Drop the null rows
train_df = train_df.dropna()
train_df = train_df.drop(columns = ['Date of Joining'])

train_df.count()

In [None]:
train_df

In [None]:
selected_features = train_df.drop(columns = ['Burn Rate'])

selected_features

In [None]:
#split train test
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(selected_features, train_df['Burn Rate'], random_state=1, stratify = train_df['Burn Rate'])

In [None]:
y_test

In [None]:
#Scale data
from sklearn.preprocessing import MinMaxScaler

X_scaler = MinMaxScaler().fit(X_train)
X_scale_train = X_scaler.transform(X_train)
X_scale_test = X_scaler.transform(X_test)


In [None]:
#Train the model 
from sklearn.linear_model import LinearRegression
from sklearn import preprocessing
from sklearn import utils 

lin_regr_model = LinearRegression()
lin_regr_model


In [None]:
#fit model

lin_regr_model.fit(X_train, y_train)

In [None]:
print(f"Training Data Score: {lin_regr_model.score(X_train, y_train)}")
print(f"Testing Data Score: {lin_regr_model.score(X_test, y_test)}")

In [None]:
predictions = lin_regr_model.predict(X_test)
print(f"First 10 Predictions:   {predictions[:10]}")
print(f"First 10 Actual labels: {y_test[:10].tolist()}")

In [None]:
pd.DataFrame({"Prediction": predictions, "Actual": y_test}).reset_index(drop=True)

In [None]:
#SVR

from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

pipe = Pipeline([('scaler', StandardScaler()), ('svr', SVR())])
# The pipeline can be used as any other estimator
# and avoids leaking the test set into the train set
pipe.fit(X_train, y_train)
# Pipeline(steps=[('scaler', StandardScaler()), ('svr', SVR())])
# pipe.score(X_test, y_test)
pipe.predict(X_test)

In [None]:
#Random Forest Regressor

from sklearn.ensemble import RandomForestRegressor

rf_model = RandomForestRegressor()
rf_model.fit(X_train, y_train)

train_pred_rf = rf_model.score(X_train, y_train)

print (f'Score {train_pred_rf}')


In [None]:
# Save the model

from joblib import dump, load
dump(lin_regr_model, 'linear_regression_model.joblib') 

In [None]:
# Load the model
loaded_lin_model = load('linear_regression_model.joblib')

In [None]:
#evaluating loaded model 

score = loaded_lin_model.score(X_test, y_test)
print("Test score: {0:.2f} %".format(100 * score))



In [None]:
# New data to predict
pr = pd.read_csv('data/test_new.csv')

# apply the whole pipeline to data
pred = pipe.predict(X_test)
print (pred)

In [None]:
#testing random input 

new_data = [1, 0, 1, 3, 10, 8.2]

pred = pipe.predict([new_data])
print(pred)

In [None]:
#Order of Columns
# Gender: 0 = Male, 1 = Female 
# Company type: 0 = Service, 1 = Product 
# WFH Setup: 0 = No, 1 = Yes 
# Designation = position of emplyee in workplace (range 0-5) high number is high designation
# Resource Allocation = number of working hours (range 1-10) 
# Menatal Fatigue = level of fatigue mentally the employee is facing. (range 0.0-10.0)

new_data = [0, 0, 1, 3, 10, 8.2]

pred = pipe.predict([new_data])
print(f'Burn Rate: {pred}')

In [None]:
new_data = [1, 1, 1, 1, 3, 2.6]

pred = pipe.predict([new_data])
print(f'Burn Rate: {pred}')


In [158]:
new_data = [0, 1, 0, 8, 5, 6.7]

pred = pipe.predict([new_data])
print(f'Burn Rate: {pred}')

Burn Rate: [0.61577179]


In [None]:
new_data = [1, 1, 0, 1, 1, 3.5]

pred = pipe.predict([new_data])
print(f'Burn Rate: {pred}')

In [None]:
print('Calculate your Burn Rate. Enter your info below')
print('* Your Burn Rate will be on a scale between 0-1')

user_input = []

while True:
    
    gender = input('Enter your gender. 0 = male, 1 = female')
    user_input.append(gender)
    
    company_type = input('Enter your company type. 0 = service, 1 = product')
    user_input.append(company_type)
    
    wfh = input('Do you have a "Work From Home" setup?. 0 = No, 1 = yes')
    user_input.append(wfh)
    
    designation = input('Rank your position in your workplace. range 0-5. Low - High, whole numbers only')
    user_input.append(designation)
    
    resource_allocation = input('How many hours do you work in a day? range 0-10, whole numbers only')
    user_input.append(resource_allocation)
    
    mental_fatigue = input('Rank your mental fatigue. 0.0 - 10.0')
    user_input.append(mental_fatigue)
    
    break
    
print(f'Your inputs: {user_input}')

pred = pipe.predict([user_input])
print(f'Your Burn Rate is: {pred}')


Calculate your Burn Rate. Enter your info below
* Your Burn Rate will be on a scale between 0-1


In [157]:
pred = pipe.predict([user_input])
print(f'Your Burn Rate is: {pred}')

Your Burn Rate is: [0.3979046]
