In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


In [None]:
# Import the data
pred_states_df = pd.read_csv("states_energy_merged.csv")
pred_states_df.head()

In [None]:
# Dropping consumption column
pred_states_df = pred_states_df.drop(['consumption'], axis=1)

pred_states_df.head()

In [None]:
# Create state data frame test  for Illinois

test_df = pred_states_df.loc[(pred_states_df['Year'] >= 2000) & (pred_states_df['State'] == 'Illinois')]
display(test_df.head())

# display(test_df.tail())
test_df.loc[test_df['Year'] == 2020, 'production'].values[0]

<h1>Multiple Liner Regression Test for Illinois</h1>

In [None]:
# Find future value for only state of Illinois
# Test is run on Produced Renewable Column using simple LR

# Select X & y values
X = test_df[['Year']].values.reshape(-1,1)
y = test_df[['production']].values.reshape(-1,1)

print(f'X shape: {X.shape}')
print(f'y shape: {y.shape}')

In [None]:
# Plot the data 

plt.scatter(X,y)

In [None]:
# Train and Test Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
# Creating a Linear Regression(LR) model

model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# Calculate the model's scores

trng_score = model.score(X_train, y_train)
testing_score = model.score(X_test, y_test)

print(f"Training Score: {trng_score}")
print(f"Testing Score: {trng_score}")

In [None]:
# Use the model to make predictions

predicted = model.predict(X_test)

In [None]:
# Score the predictions with mse and r2
mse = mean_squared_error(y_test, predicted)
r2 = r2_score(y_test, predicted)

print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared (R2): {r2}")


predictions = model.predict(X_test[:5])
print(f"Predicted classes: {predictions}")

n_2025 = model.predict([[2025]])
n_2030 = model.predict([[2030]])
n_2035 = model.predict([[2035]])

print('''''')
print(f"Illinois Renewable Energy Production in 2020: {int(test_df.loc[test_df['Year'] == 2020, 'production'].values[0])}")

print('''''')      
print(f'Prediction for 2025: {int(n_2025[0][0])}')
      
print('''''')      
print(f'Prediction for 2030: {int(n_2030[0][0])}')
      
print('''''')      
print(f'Prediction for 2035: {int(n_2035[0][0])}')

<h1>Finding the Best Scores R-Squared (R2)</h1>

<h1>Current-Dollar Gross Domestic Product (GDP)</h1>

<h1>Population</h1>

<h1>Energy Price</h1>

<h1>Multiple Liner Regression to Predict State Energy Difference</h1>

In [None]:
# Import the data
pred_states_df = pd.read_csv("states_energy_merged.csv")


In [None]:
pred_states_df['difference'] = pred_states_df["production"]- pred_states_df["consumption"]

In [None]:
# Dropping consumption column
pred_states_df = pred_states_df.drop(['consumption'], axis=1)

pred_states_df.head()

In [None]:
states = pred_states_df["State"].unique()

state_dicts = {}

for state in states:

    df = pred_states_df.loc[pred_states_df['State'] == state]
    
    
    # Population #
    #============#

    X2 = df['Year'].values.reshape(-1,1)
    y2 = df['population'].values.reshape(-1,1)


    X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, random_state= 42)

    # Create and fit the models for each state for RP
    model2 = LinearRegression()
    model2.fit(X2_train, y2_train)

    # Calculate the RP testing & training scores
    trng_score = model2.score(X2_train, y2_train)
    testing_score = model2.score(X2_test, y2_test)

    # Use our models to make RP predictions
    predicted = model2.predict(X2_test)

    # Score the ep predictions with mse and r2 for each state
    mse2 = mean_squared_error(y2_test, predicted)
    r2 = r2_score(y2_test, predicted)

    # Make predictions for RP with MLR
    pop_predictions = []
    for p in range(2021, 2036):
        p_prediction = model.predict([[p]])
        pop_predictions.append(float(p_prediction))
        
        
    df = pred_states_df.loc[pred_states_df['State'] == state]  

    
     # Renewable Production #
    #======================#

    X3 = df['Year'].values.reshape(-1,1)
    y3 = df['production'].values.reshape(-1,1)


    X3_train, X3_test, y3_train, y3_test = train_test_split(X3, y3, random_state= 42)

    # Create and fit the models for each state for RP
    model3 = LinearRegression()
    model3.fit(X3_train, y3_train)

    # Calculate the RP testing & training scores
    trng_score3 = model3.score(X3_train, y3_train)
    testing_score3 = model3.score(X3_test, y3_test)

    # Use our models to make RP predictions
    predicted3 = model3.predict(X3_test)

    # Score the ep predictions with mse and r2 for each state
    mse3 = mean_squared_error(y3_test, predicted3)
    r2 = r2_score(y3_test, predicted3)

    # Make predictions for RP with MLR
    rp_predictions = []
    for l in range(2021, 2036):
        l_prediction = model3.predict([[l]])
        rp_predictions.append(float(l_prediction))
        
        
     # Energy Price #
     #==============#

    X4 = df['Year'].values.reshape(-1,1)
    y4 = df['price'].values.reshape(-1,1)


    X4_train, X4_test, y4_train, y4_test = train_test_split(X4, y4, random_state=42)

    # Create and fit the models for each state for RP
    model4 = LinearRegression()
    model4.fit(X4_train, y4_train)

    # Calculate the RP testing & training scores
    trng_score4 = model4.score(X4_train, y4_train)
    testing_score4 = model4.score(X4_test, y4_test)

    # Use our models to make RP predictions
    predicted4 = model4.predict(X4_test)

    # Score the ep predictions with mse and r2 for each state
    mse4 = mean_squared_error(y4_test, predicted4)
    r2 = r2_score(y4_test, predicted4)

    # Make predictions for RP with MLR
    ep_predictions = []
    for d in range(2021, 2036):
        d_prediction = model4.predict([[d]])
        ep_predictions.append(float(d_prediction))
        
    
    #  Consumption  #
    #==============#

    X5 = df['Year'].values.reshape(-1,1)
    y5 = df['consumption'].values.reshape(-1,1)


    X5_train, X5_test, y5_train, y5_test = train_test_split(X5, y5, random_state=42)

    # Create and fit the models for each state for RP
    model5 = LinearRegression()
    model5.fit(X5_train, y5_train)

    # Calculate the RP testing & training scores
    trng_score5 = model5.score(X5_train, y5_train)
    testing_score5 = model5.score(X5_test, y5_test)

    # Use our models to make RP predictions
    predicted5 = model5.predict(X5_test)

    # Score the ep predictions with mse and r2 for each state
    mse5 = mean_squared_error(y_test, predicted5)
    r2 = r2_score(y_test, predicted5)

    # Make predictions for RP with MLR
    consume_predictions = []
    for c in range(2021, 2036):
        c_prediction = model5.predict([[c]])
        consume_predictions.append(float(c_prediction))
    
    
    
    # Energy Difference #
    #==================#

    X6 = df['Year'].values.reshape(-1,1)
    y6 = df['Difference'].values.reshape(-1,1)


    X6_train, X6_test, y6_train, y6_test = train_test_split(X6, y6, random_state=42)

    # Create and fit the models for each state for RP
    model6 = LinearRegression()
    model6.fit(X6_train, y6_train)

    # Calculate the RP testing & training scores
    trng_score6 = model6.score(X6_train, y6_train)
    testing_score6 = model6.score(X6_test, y6_test)

    # Use our models to make RP predictions
    predicted6 = model6.predict(X6_test)

    # Score the ep predictions with mse and r2 for each state
    mse6 = mean_squared_error(y6_test, predicted6)
    r2 = r2_score(y6_test, predicted6)

    # Make predictions for RP with MLR
    ed_predictions = []
    for x in range(2021, 2036):
        x_prediction = model6.predict([[x]])
        ed_predictions.append(float(x_prediction))
    
        
    # Update States Dictionary
    state_dicts.update({state: {
                        'state': list([state] * len(range(2021, 2036))),
                        'year': list(range(2021, 2035)),
                        'produced_renewable': rp_predictions,
                        'population': pop_predictions,
                        'energy_price': ep_predictions,
                        'total_consumed': consume_predictions,
                        'difference': ed_predictions
                        }})
pred_states_df
pred_state_dicts

In [None]:
# Create Dictionary of DataFrames
dfs = {}
for state in pred_state_dicts:
    dfs.update({state: pd.DataFrame(pred_state_dicts[state])})

# Append new DataFrames to previous states_df
for state in dfs:
    pred_state_df = pred_state_df.append(dfs[state],ignore_index=True)


pred_state_df = pred_state_df.sort_values(by=["State","Year"])
pred_state_df.head()

In [None]:
X= pred_state_df.loc[states_df['State'] == "Wisconsin"]

# Plot the data
plt.scatter(X['Year'],X['Total Consumed (Bil. Btu)'])

In [None]:
pred_statess_df = pred_states_df[['State','Year','Prod. Renewable (Bil. Btu)','Pop. (1000s)','Energy Pr. ($/mil. btu)',
                       'Total Consumed (Bil. Btu)','Difference']]

In [None]:
states_df.to_csv('"Resources/&&&&&&&&&.csv"', float_format='%.1f')