In [1]:
# Initial imports
# import numpy as np
import pandas as pd
import sklearn.linear_model

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

model_data = ('/content/model_df.csv')
pred_data = ('/content/for_predictions.csv')

In [2]:
def clean_set(data):
  data_df = pd.read_csv(data)

  data_df['ZIP_CODE'] = data_df['ZIP_CODE'].apply(lambda x: f'{x:05}')

  # set ZIP_CODE as index
  data_df = data_df.set_index('ZIP_CODE')

  data_df['EV_TOTAL'] = data_df.iloc[:, -9:-7].sum(axis=1)

  data_df = data_df[['EV_LEVEL_1', 'EV_LEVEL_2', 'EV_FAST', 'EV_TOTAL',
               'NETWORK_TYPE_ChargePoint',	'NETWORK_TYPE_Non_Networked',	'NETWORK_TYPE_Other_Network',
               'TOTAL_HOUSEHOLDS',	'MEDIAN_INCOME',
               'REG_COUNTS']]
  
  return data_df

In [3]:
model_df = clean_set(model_data)
pred_df = clean_set(pred_data)

# Train-Test-Split

In [4]:
def target_feature(dataframe):
  y = model_df.REG_COUNTS

  X = model_df[['EV_LEVEL_1', 'EV_LEVEL_2', 'EV_FAST', 'EV_TOTAL',
                    'NETWORK_TYPE_ChargePoint',	'NETWORK_TYPE_Non_Networked',	'NETWORK_TYPE_Other_Network',
                    'TOTAL_HOUSEHOLDS',	'MEDIAN_INCOME']]
  
  return (X,y)

In [5]:
model_X, model_y = target_feature(model_df)

pred_X, pred_y = target_feature(pred_df)

In [6]:
# Split the data into training/testing sets
X_train, X_test, y_train, y_test = train_test_split(model_X, model_y, test_size=0.33, random_state=42)

# Scaling & Normalization

In [7]:
input_scaler = StandardScaler()
output_scaler = StandardScaler()

X_train_scaled = input_scaler.fit_transform(X_train)
X_test_scaled = input_scaler.transform(X_test)

y_train_scaled = output_scaler.fit_transform(y_train.values.reshape(-1,1))
y_test_scaled = output_scaler.transform(y_test.values.reshape(-1,1))

# Linear Regression

In [8]:
# Train the model using the training sets
model = LinearRegression()
model.fit(X_train_scaled, y_train_scaled)

LinearRegression()

#Export Predictions

In [13]:
def predict(in_scaler, out_scaler, orig_df, ml_model, data_X):
  data_scaled = in_scaler.transform(data_X)

  pred_scaled = ml_model.predict(data_scaled)

  pred = out_scaler.inverse_transform(pred_scaled)

  pred_df = pd.DataFrame(pred, index=data_X.index, columns=['REG_PREDICT'])

  pred_out_df = orig_df.merge(pred_df, how='left', on='ZIP_CODE')

  return pred_out_df

In [14]:
model_predictions_df = predict(input_scaler, output_scaler, model_df, model, model_X)

# not working with current predict function
# pred_predictions_df = predict(input_scaler, output_scaler, pred_df, model, pred_X)

In [15]:
# export ev_ml_out_df for Tableau.
model_predictions_df.to_csv('pred_out_df.csv')