In [1]:
# Initial imports
import pandas as pd
from path import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.pipeline import Pipeline

In [2]:
# Loading data
file_path = Path("./ml_df.csv")
ml_df = pd.read_csv(file_path)
ml_df.head()

Unnamed: 0,dt,temp,temp_min,temp_max,humidity,snowfall
0,1990-01-01,32.98,32.98,32.98,95.0,1
1,1990-01-02,27.54,27.54,27.54,91.5,1
2,1990-01-03,30.13,30.13,30.13,91.08,1
3,1990-01-04,34.82,34.82,34.82,91.79,1
4,1990-01-05,38.99,38.99,38.99,94.33,0


In [3]:
# Drop columns that are not important features to the prediction.
ml_df = ml_df.drop(columns=['dt', 'temp_min', 'temp_max'])

In [4]:
# Define features set
X = ml_df.copy()
X = X.drop("snowfall", axis=1)
X.head()

Unnamed: 0,temp,humidity
0,32.98,95.0
1,27.54,91.5
2,30.13,91.08
3,34.82,91.79
4,38.99,94.33


In [5]:
# Define target vector
y = ml_df["snowfall"].values.reshape(-1, 1)
y[:5]

array([[1],
       [1],
       [1],
       [1],
       [0]])

In [6]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [8]:
# Create a Random Forest Classifier with Pipeline for the pkl file
rf_model = Pipeline([('scaler', StandardScaler()), ('forest', RandomForestClassifier())])


In [10]:
# Fitting the Model
rf_model = rf_model.fit(X_train, y_train)

  self._final_estimator.fit(Xt, y, **fit_params_last_step)


In [11]:
predictions = rf_model.predict(X_test)
predictions

array([0, 1, 1, ..., 0, 0, 0])

In [12]:
# Calculating a confusion matrix
cm = confusion_matrix(y_test, predictions) 

# Create a DataFram e from the confusion matrix
cm_df = pd.DataFrame(
cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])
cm_df

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,12985,1248
Actual 1,1221,2521


In [13]:
# Calculate Accuracy Score
acc_score = accuracy_score(y_test, predictions)
acc_score

0.8626425591098749

In [56]:
# Displaying Results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,12985,1248
Actual 1,1221,2521


Accuracy Score : 0.8626425591098749
Classification Report
              precision    recall  f1-score   support

           0       0.91      0.91      0.91     14233
           1       0.67      0.67      0.67      3742

    accuracy                           0.86     17975
   macro avg       0.79      0.79      0.79     17975
weighted avg       0.86      0.86      0.86     17975



In [17]:
# Export to pkl
import pickle
pickle.dump(rf_model, open('model.pkl', 'wb'))



In [55]:
rf_model = pickle.load(open('model.pkl','rb'))
print(rf_model.predict([[27, 95]]))

[1]


