In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import pickle
import time
import seaborn as sns

pd.set_option('display.max_colwidth', 3000)
pd.set_option("display.max_rows", 100000)

In [2]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [5]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

In [6]:
def model_evaluator(y_truth, y_preds):
    mse = mean_squared_error(y_truth, y_preds)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_truth, y_preds)
    r2 = r2_score(y_truth, y_preds)
    
    print("Mean Squared Error: {:.3f}".format(mse))
    print("Root Mean Squared Error: {:.3f}".format(rmse))
    print("Mean Absolute Error: {:.3f}".format(mae))
    print("R-Squared Score: {:.3f}".format(r2))
    
    return mse, rmse, mae, r2

In [7]:
def save_model(model, filepath):
    with open(filepath, "wb") as f:
        pickle.dump(model, f)
        
def load_model(filepath):
    with open(filepath, "rb") as f:
        return pickle.load(f)

In [8]:
print("Reading Features Data from Disk...")
_dir = r"E:\ARSH\NEU\Fall 2021\DS 5500\Project\Data\Features_From_CNN"
test_filepath = r"{}\df_test_densenet161_hurricane_features.csv".format(_dir)
train_filepath = r"{}\df_train_densenet161_hurricane_features.csv".format(_dir)
train_df = pd.read_csv(train_filepath, index_col=0)
test_df = pd.read_csv(test_filepath, index_col=0)
print("Done!")

Reading Features Data from Disk...
Done!


In [9]:
print(train_df.shape, test_df.shape)

(61637, 2355) (37913, 2355)


In [10]:
train_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2345,2346,2347,2348,2349,2350,2351,storm_id,image_list,wind_speed
0,-0.00355,-0.007286,-0.009102,-0.0094,-0.002762,-0.002491,0.001555,-0.007246,-0.015684,-0.013834,...,-0.002486,-0.00382,-0.004796,-0.004386,-0.004609,-0.004294,-0.001626,abs,"['/content//nasa_tropical_storm_competition_train_source/nasa_tropical_storm_competition_train_source_abs_000/image.jpg', '/content//nasa_tropical_storm_competition_train_source/nasa_tropical_storm_competition_train_source_abs_009/image.jpg', '/content//nasa_tropical_storm_competition_train_source/nasa_tropical_storm_competition_train_source_abs_018/image.jpg']",64
1,0.001127,0.004741,0.010198,0.007915,0.000789,-0.003223,-0.002085,0.001353,0.006295,0.010862,...,-4.2e-05,0.003036,0.005135,0.006542,0.004115,0.000852,0.000636,abs,"['/content//nasa_tropical_storm_competition_train_source/nasa_tropical_storm_competition_train_source_abs_001/image.jpg', '/content//nasa_tropical_storm_competition_train_source/nasa_tropical_storm_competition_train_source_abs_010/image.jpg', '/content//nasa_tropical_storm_competition_train_source/nasa_tropical_storm_competition_train_source_abs_019/image.jpg']",64
2,0.011335,0.009224,0.00751,-0.00541,-0.006022,-0.008802,-0.00264,0.007572,0.005992,0.009079,...,0.001811,0.001397,0.000819,-0.002579,-0.005391,-0.004161,-0.003541,abs,"['/content//nasa_tropical_storm_competition_train_source/nasa_tropical_storm_competition_train_source_abs_002/image.jpg', '/content//nasa_tropical_storm_competition_train_source/nasa_tropical_storm_competition_train_source_abs_011/image.jpg', '/content//nasa_tropical_storm_competition_train_source/nasa_tropical_storm_competition_train_source_abs_020/image.jpg']",64
3,0.000995,0.001349,-0.001129,-0.002661,-0.003483,-0.004381,-0.003357,0.004359,0.000799,-0.005693,...,0.000126,-0.001589,-0.00427,-0.006984,-0.006126,-0.000702,0.000676,abs,"['/content//nasa_tropical_storm_competition_train_source/nasa_tropical_storm_competition_train_source_abs_003/image.jpg', '/content//nasa_tropical_storm_competition_train_source/nasa_tropical_storm_competition_train_source_abs_012/image.jpg', '/content//nasa_tropical_storm_competition_train_source/nasa_tropical_storm_competition_train_source_abs_021/image.jpg']",63
4,-0.008107,-0.013242,-0.017639,-0.011723,-0.006891,-0.003026,-0.001242,-0.011085,-0.026543,-0.030342,...,-0.001443,-0.001076,-0.001096,-0.002821,-0.001991,-0.003359,-0.001408,abs,"['/content//nasa_tropical_storm_competition_train_source/nasa_tropical_storm_competition_train_source_abs_004/image.jpg', '/content//nasa_tropical_storm_competition_train_source/nasa_tropical_storm_competition_train_source_abs_013/image.jpg', '/content//nasa_tropical_storm_competition_train_source/nasa_tropical_storm_competition_train_source_abs_022/image.jpg']",62


In [11]:
test_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2345,2346,2347,2348,2349,2350,2351,storm_id,image_list,wind_speed
0,-0.00116,-0.00112,-0.000784,-0.001614,-0.000984,-0.005864,-0.002829,-0.00214,-0.005242,-0.007274,...,0.001568,0.002081,0.003531,0.001224,0.000862,0.001087,0.00197,acd,"['/content//nasa_tropical_storm_competition_test_source/nasa_tropical_storm_competition_test_source_acd_123/image.jpg', '/content//nasa_tropical_storm_competition_test_source/nasa_tropical_storm_competition_test_source_acd_132/image.jpg', '/content//nasa_tropical_storm_competition_test_source/nasa_tropical_storm_competition_test_source_acd_141/image.jpg']",30
1,-8.9e-05,-0.001554,0.004143,0.002703,0.005077,0.000657,0.000336,-0.002449,-0.001684,0.000346,...,0.000222,-0.002708,-0.006217,-0.005604,-0.004774,-0.00027,0.00029,acd,"['/content//nasa_tropical_storm_competition_test_source/nasa_tropical_storm_competition_test_source_acd_124/image.jpg', '/content//nasa_tropical_storm_competition_test_source/nasa_tropical_storm_competition_test_source_acd_133/image.jpg', '/content//nasa_tropical_storm_competition_test_source/nasa_tropical_storm_competition_test_source_acd_142/image.jpg']",30
2,-0.00536,-0.009921,-0.010257,-0.008396,0.002953,0.004118,0.006383,-0.009008,-0.011066,-0.012372,...,-0.005381,-0.011197,-0.013254,-0.011409,-0.004929,-0.002992,-0.000493,acd,"['/content//nasa_tropical_storm_competition_test_source/nasa_tropical_storm_competition_test_source_acd_125/image.jpg', '/content//nasa_tropical_storm_competition_test_source/nasa_tropical_storm_competition_test_source_acd_134/image.jpg', '/content//nasa_tropical_storm_competition_test_source/nasa_tropical_storm_competition_test_source_acd_143/image.jpg']",30
3,-0.005878,-0.006456,-0.001434,-0.00263,0.000808,-0.002334,0.000972,-0.00964,-0.014201,-0.009425,...,-0.000342,-0.000646,0.000306,-0.001485,-0.000177,-0.000486,0.000478,acd,"['/content//nasa_tropical_storm_competition_test_source/nasa_tropical_storm_competition_test_source_acd_126/image.jpg', '/content//nasa_tropical_storm_competition_test_source/nasa_tropical_storm_competition_test_source_acd_135/image.jpg', '/content//nasa_tropical_storm_competition_test_source/nasa_tropical_storm_competition_test_source_acd_144/image.jpg']",30
4,0.00064,0.005447,0.010191,0.011924,0.00419,0.001045,-0.0025,0.003412,0.008251,0.010454,...,-0.001428,-0.001918,-0.002292,-0.002038,-0.001947,0.000368,0.000483,acd,"['/content//nasa_tropical_storm_competition_test_source/nasa_tropical_storm_competition_test_source_acd_127/image.jpg', '/content//nasa_tropical_storm_competition_test_source/nasa_tropical_storm_competition_test_source_acd_136/image.jpg', '/content//nasa_tropical_storm_competition_test_source/nasa_tropical_storm_competition_test_source_acd_145/image.jpg']",30


In [12]:
feature_size = 2352 - 1 # number of features extracted from densenet161 - 1
X_train, y_train = train_df.iloc[:, :feature_size], train_df["wind_speed"]
X_test, y_test = test_df.iloc[:, :feature_size], test_df["wind_speed"]

# 1. Linear Regression

In [13]:
print("Fitting Linear Regression Model...")
start = time.time()
linearRegression = LinearRegression()
linearRegression.fit(X_train, y_train)
print("Done!")
print("Time Taken: {:.5f}".format(time.time()-start))

Fitting Linear Regression Model...
Done!
Time Taken: 107.16174


In [15]:
model_name = "linear_regression"
filepath = r"{}\Trained Models\{}.sav".format(_dir, model_name)
save_model(linearRegression, filepath)

In [24]:
print("Evaluating Model on Train Set...")
y_preds = linearRegression.predict(X_train)
print("Computing Metrics...\n")
mse, rmse, mae, r2 = model_evaluator(y_train, y_preds)
print("")
print("Done!")

Evaluating Model on Train Set...
Computing Metrics...

Mean Squared Error: 712.892
Root Mean Squared Error: 26.700
Mean Absolute Error: 21.035
R-Squared Score: 0.037

Done!


In [25]:
print("Evaluating Model on Test Set...")
y_preds = linearRegression.predict(X_test)
print("Computing Metrics...\n")
mse, rmse, mae, r2 = model_evaluator(y_test, y_preds)
print("")
print("Done!")

Evaluating Model on Test Set...
Computing Metrics...

Mean Squared Error: 665.048
Root Mean Squared Error: 25.789
Mean Absolute Error: 21.030
R-Squared Score: -0.086

Done!


# 2. Rigde Regression

In [26]:
print("Fitting Ridge Regression Model...")
start = time.time()
ridge = Ridge()
ridge.fit(X_train, y_train)
print("Done!")
print("Time Taken: {:.5f}".format(time.time()-start))

Fitting Ridge Regression Model...
Done!
Time Taken: 8.47800


In [27]:
model_name = "ridge"
filepath = r"{}\Trained Models\{}.sav".format(_dir, model_name)
save_model(ridge, filepath)

In [28]:
print("Evaluating Model on Train Set...")
y_preds = ridge.predict(X_train)
print("Computing Metrics...\n")
mse, rmse, mae, r2 = model_evaluator(y_train, y_preds)
print("")
print("Done!")

Evaluating Model on Train Set...
Computing Metrics...

Mean Squared Error: 729.094
Root Mean Squared Error: 27.002
Mean Absolute Error: 21.281
R-Squared Score: 0.015

Done!


In [29]:
print("Evaluating Model on Test Set...")
y_preds = ridge.predict(X_test)
print("Computing Metrics...\n")
mse, rmse, mae, r2 = model_evaluator(y_test, y_preds)
print("")
print("Done!")

Evaluating Model on Test Set...
Computing Metrics...

Mean Squared Error: 643.205
Root Mean Squared Error: 25.361
Mean Absolute Error: 20.766
R-Squared Score: -0.051

Done!


# 3. Lasso Regression

In [30]:
print("Fitting Lasso Regression Model...")
start = time.time()
lasso = Lasso()
lasso.fit(X_train, y_train)
print("Done!")
print("Time Taken: {:.5f}".format(time.time()-start))

Fitting Lasso Regression Model...
Done!
Time Taken: 3.37200


In [31]:
model_name = "lasso"
filepath = r"{}\Trained Models\{}.sav".format(_dir, model_name)
save_model(lasso, filepath)

In [32]:
print("Evaluating Model on Train Set...")
y_preds = lasso.predict(X_train)
print("Computing Metrics...\n")
mse, rmse, mae, r2 = model_evaluator(y_train, y_preds)
print("")
print("Done!")

Evaluating Model on Train Set...
Computing Metrics...

Mean Squared Error: 740.181
Root Mean Squared Error: 27.206
Mean Absolute Error: 21.457
R-Squared Score: 0.000

Done!


In [33]:
print("Evaluating Model on Test Set...")
y_preds = lasso.predict(X_test)
print("Computing Metrics...\n")
mse, rmse, mae, r2 = model_evaluator(y_test, y_preds)
print("")
print("Done!")

Evaluating Model on Test Set...
Computing Metrics...

Mean Squared Error: 638.546
Root Mean Squared Error: 25.269
Mean Absolute Error: 20.718
R-Squared Score: -0.043

Done!


# 4. ElasticNet Regression

In [34]:
print("Fitting ElasticNet Regression Model...")
start = time.time()
elastic_net = ElasticNet()
elastic_net.fit(X_train, y_train)
print("Done!")
print("Time Taken: {:.5f}".format(time.time()-start))

Fitting ElasticNet Regression Model...
Done!
Time Taken: 2.79999


In [35]:
model_name = "elastic_net"
filepath = r"{}\Trained Models\{}.sav".format(_dir, model_name)
save_model(elastic_net, filepath)

In [36]:
print("Evaluating Model on Train Set...")
y_preds = elastic_net.predict(X_train)
print("Computing Metrics...\n")
mse, rmse, mae, r2 = model_evaluator(y_train, y_preds)
print("")
print("Done!")

Evaluating Model on Train Set...
Computing Metrics...

Mean Squared Error: 740.181
Root Mean Squared Error: 27.206
Mean Absolute Error: 21.457
R-Squared Score: 0.000

Done!


In [37]:
print("Evaluating Model on Test Set...")
y_preds = elastic_net.predict(X_test)
print("Computing Metrics...\n")
mse, rmse, mae, r2 = model_evaluator(y_test, y_preds)
print("")
print("Done!")

Evaluating Model on Test Set...
Computing Metrics...

Mean Squared Error: 638.546
Root Mean Squared Error: 25.269
Mean Absolute Error: 20.718
R-Squared Score: -0.043

Done!


# 5. Decision Tree

In [20]:
print("Fitting Decision Tree Regressor...")
start = time.time()
dtree = DecisionTreeRegressor(min_samples_split=150, min_samples_leaf=100, max_depth=50, random_state=7)
dtree.fit(X_train, y_train)
print("Done!")
print("Time Taken: {:.5f}".format(time.time()-start))

Fitting Decision Tree Regressor...
Done!
Time Taken: 378.49815


In [21]:
model_name = "dtree"
filepath = r"{}\Trained Models\{}.sav".format(_dir, model_name)
save_model(dtree, filepath)

In [22]:
print("Evaluating Model on Train Set...")
y_preds = dtree.predict(X_train)
print("Computing Metrics...\n")
mse, rmse, mae, r2 = model_evaluator(y_train, y_preds)
print("")
print("Done!")

Evaluating Model on Train Set...
Computing Metrics...

Mean Squared Error: 652.155
Root Mean Squared Error: 25.537
Mean Absolute Error: 19.974
R-Squared Score: 0.119

Done!


In [23]:
print("Evaluating Model on Test Set...")
y_preds = dtree.predict(X_test)
print("Computing Metrics...\n")
mse, rmse, mae, r2 = model_evaluator(y_test, y_preds)
print("")
print("Done!")

Evaluating Model on Test Set...
Computing Metrics...

Mean Squared Error: 728.642
Root Mean Squared Error: 26.993
Mean Absolute Error: 21.853
R-Squared Score: -0.190

Done!


# 6. Random Forest

In [25]:
print("Fitting Randon Forest Regressor...")
start = time.time()
random_forest = RandomForestRegressor(min_samples_split=150, min_samples_leaf=50,
                                      n_estimators=300, max_depth=50, random_state=7, verbose=1)
random_forest.fit(X_train, y_train)
print("Done!")
print("Time Taken: {:.5f}".format(time.time()-start))

Fitting Randon Forest Regressor...


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Done!
Time Taken: 69614.27211


[Parallel(n_jobs=1)]: Done 300 out of 300 | elapsed: 1160.2min finished


In [26]:
model_name = "random_forest"
filepath = r"{}\Trained Models\{}.sav".format(_dir, model_name)
save_model(random_forest, filepath)

In [27]:
print("Evaluating Model on Train Set...")
y_preds = random_forest.predict(X_train)
print("Computing Metrics...\n")
mse, rmse, mae, r2 = model_evaluator(y_train, y_preds)
print("")
print("Done!")

Evaluating Model on Train Set...


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Computing Metrics...

Mean Squared Error: 596.880
Root Mean Squared Error: 24.431
Mean Absolute Error: 19.185
R-Squared Score: 0.194

Done!


[Parallel(n_jobs=1)]: Done 300 out of 300 | elapsed:    5.5s finished


In [28]:
print("Evaluating Model on Test Set...")
y_preds = random_forest.predict(X_test)
print("Computing Metrics...\n")
mse, rmse, mae, r2 = model_evaluator(y_test, y_preds)
print("")
print("Done!")

Evaluating Model on Test Set...


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Computing Metrics...

Mean Squared Error: 642.071
Root Mean Squared Error: 25.339
Mean Absolute Error: 20.787
R-Squared Score: -0.049

Done!


[Parallel(n_jobs=1)]: Done 300 out of 300 | elapsed:    4.1s finished


In [39]:
from sklearn.ensemble import BaggingRegressor

In [None]:
print("Fitting Linear BaggingRegressor Model...")
start = time.time()
base_estimator = LinearRegression()
linear_bagging_regressor = BaggingRegressor(base_estimator=base_estimator, random_state=7, max_samples=1.0,
                                            max_features=1.0, n_estimators=2)
linear_bagging_regressor.fit(X_train, y_train)
print("Done!")
print("Time Taken: {:.5f}".format(time.time()-start))

Fitting Linear BaggingRegressor Model...
