# Purpose of Notebook

This is the current final form linear regression machine learning model for Data Miners Group EV Registration Analysis

The linear regression model can generate predictions based on the available target and features in the dataset generated with the data_miners_preprocessing_AWS notebook.

The User Interactive section of the notebook allows a user to select a feature of the dataset to modify, and add a modifcation value to the feature. The code will export a csv with the modified data and present a dataframe for inspection.

# Initial Imports & Load CSV

In [1]:
# Initial imports
import pandas as pd
import sklearn.linear_model

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

model_data = ('/content/model_df.csv')

# Activate Functions

In [2]:
# function to turn csv data to dataframe
def clean_set(data):
  # read csv
  data_df = pd.read_csv(data)

  # prepare zip code data to appropriate 5 digit format
  data_df['ZIP_CODE'] = data_df['ZIP_CODE'].apply(lambda x: f'{x:05}')

  # set ZIP_CODE as index
  data_df = data_df.set_index('ZIP_CODE')

  # create an EV_TOTAL Column with sum of EV Types
  data_df['EV_TOTAL'] = data_df.iloc[:, -9:-7].sum(axis=1)

  # Order columns
  data_df = data_df[['EV_LEVEL_1', 'EV_LEVEL_2', 'EV_FAST', 'EV_TOTAL',
               'NETWORK_TYPE_ChargePoint',	'NETWORK_TYPE_Non_Networked',	'NETWORK_TYPE_Other_Network',
               'TOTAL_HOUSEHOLDS',	'MEDIAN_INCOME',
               'REG_COUNTS']]
  
  # return the data frame
  return data_df


# function to get target and features
def target_feature(dataframe):
  # assign y (target)
  y = dataframe.REG_COUNTS

  # assign X (features)
  X = dataframe[['EV_LEVEL_1', 'EV_LEVEL_2', 'EV_FAST', 'EV_TOTAL',
                    'NETWORK_TYPE_ChargePoint',	'NETWORK_TYPE_Non_Networked',	'NETWORK_TYPE_Other_Network',
                    'TOTAL_HOUSEHOLDS',	'MEDIAN_INCOME']]
  
  # return target and feature variables
  return (X,y)

# function to generate predictions from dataset
def predict(in_scaler, out_scaler, orig_df, ml_model, data_X):
  # transform feature data
  data_scaled = in_scaler.transform(data_X)

  # make predictions with scaled feature data
  pred_scaled = ml_model.predict(data_scaled)

  # inverse transform prediction data to un-scaled format
  pred = out_scaler.inverse_transform(pred_scaled)

  # create a data frame with prediction data
  pred_df = pd.DataFrame(pred, index=data_X.index, columns=['REG_PREDICT'])

  # merge original data frame with prediction data
  pred_out_df = orig_df.merge(pred_df, how='left', on='ZIP_CODE')

  # return merged dataframe
  return pred_out_df

# Create dataframe for ML Model

In [3]:
# use clean_set function to prepare dataframe for ML Model
model_df = clean_set(model_data)

# Train-Test-Split

In [4]:
# use target_feature function to assign target features for ml model
model_X, model_y = target_feature(model_df)

# Split the data into training/testing sets
X_train, X_test, y_train, y_test = train_test_split(model_X, model_y, test_size=0.33, random_state=42)

# Scaling

In [5]:
# input and output scaler to fit and transform data and
# and later perform an inverse transformation to return
# more useable data
input_scaler = StandardScaler()
output_scaler = StandardScaler()

X_train_scaled = input_scaler.fit_transform(X_train)
X_test_scaled = input_scaler.transform(X_test)

y_train_scaled = output_scaler.fit_transform(y_train.values.reshape(-1,1))
y_test_scaled = output_scaler.transform(y_test.values.reshape(-1,1))

# Linear Regression

In [6]:
# Train the model using the training sets
model = LinearRegression()
model.fit(X_train_scaled, y_train_scaled)

LinearRegression()

#Export Predictions

In [7]:
# use predict function to return a dataframe with predictions.
model_predictions_df = predict(input_scaler, output_scaler, model_df, model, model_X)

# export ev_ml_out_df for Tableau.
model_predictions_df.to_csv('pred_out_df.csv')

# User Interactive

In [8]:
# create a copy of the model_df dataframe
test_df = model_df.copy()

# input functions to gather data from user.
select_feature = input("\n Enter a feature from your dataset: \n EV_LEVEL_1, EV_LEVEL_2, EV_FAST, EV_TOTAL, \n NETWORK_TYPE_ChargePoint, NETWORK_TYPE_Non_Networked, NETWORK_TYPE_Other_Network, \n TOTAL_HOUSEHOLDS, MEDIAN_INCOME \n\n ")
modifier_input = input("Modify feature by what (whole number)? \n")

# convert input data to int format for processing
modifier = int(modifier_input)

# iterate over the dataframe row by row
for index_label, row_series in test_df.iterrows():
    # For each row update the 'Bonus' value to it's double
    test_df.at[index_label , select_feature] = row_series[select_feature] + modifier

# generate target and feature data to make prediction
pred_X, pred_y = target_feature(test_df)

# use predict function to return dataframe with user input data
feature_pred_df = predict(input_scaler, output_scaler, test_df, model, pred_X)

# create a new column with the differnce between REG_COUNTS and REG_PREDICT
feature_pred_df['TARGET_CHANGE'] = feature_pred_df['REG_PREDICT'] - feature_pred_df['REG_COUNTS']

# create a filename based on user input data
filename = (select_feature + "_" + str(modifier))

# create a csv filename for exporting csv with descriptive filename
# that reflects the user input data.
csv_name = "pred_%s.csv" % filename

# export csv for use in Tableu to visualize data.
feature_pred_df.to_csv(csv_name)

# display dataframe for inspection.
feature_pred_df


 Enter a feature from your dataset: 
 EV_LEVEL_1, EV_LEVEL_2, EV_FAST, EV_TOTAL, 
 NETWORK_TYPE_ChargePoint, NETWORK_TYPE_Non_Networked, NETWORK_TYPE_Other_Network, 
 TOTAL_HOUSEHOLDS, MEDIAN_INCOME 

 EV_FAST
Modify feature by what (whole number)? 
1


Unnamed: 0_level_0,EV_LEVEL_1,EV_LEVEL_2,EV_FAST,EV_TOTAL,NETWORK_TYPE_ChargePoint,NETWORK_TYPE_Non_Networked,NETWORK_TYPE_Other_Network,TOTAL_HOUSEHOLDS,MEDIAN_INCOME,REG_COUNTS,REG_PREDICT,TARGET_CHANGE
ZIP_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
03266,0,2,1,2,0,1,0,851.0,54904.0,0,-298.358067,-298.358067
07001,0,1,1,1,0,1,0,5022.0,69849.0,173,315.001494,142.001494
07003,0,12,6,12,4,1,3,18577.0,78034.0,800,2132.050752,1332.050752
07004,0,6,4,6,0,1,2,2621.0,122063.0,308,1545.683982,1237.683982
07005,0,1,11,1,0,1,1,5489.0,111946.0,446,1458.738336,1012.738336
...,...,...,...,...,...,...,...,...,...,...,...,...
99352,0,13,4,13,1,1,4,12877.0,81410.0,3413,1621.934183,-1791.065817
99354,36,48,1,84,0,45,1,9175.0,73369.0,2082,1000.749742,-1081.250258
99361,0,1,1,1,0,0,1,750.0,69118.0,100,169.792598,69.792598
99362,1,27,1,28,1,3,12,15717.0,56665.0,2054,2357.427031,303.427031
