In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import requests
import json
import time
from datetime import datetime 
import sqlalchemy as sqla
from sqlalchemy import create_engine
import csv
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('cleaned_training_dataset_2.csv')
df = df[df['Station'] != 507]

In [None]:
stationDataframes = {}
for i in sorted(pd.unique(df["Station"].ravel())):
    newDB = df[df['Station'] == i]
    name = str(i)
    stationDataframes[name] = newDB

In [None]:
for key, value in stationDataframes.items():
    DF = pd.get_dummies(value, columns=['Station','Weather', 'Description', 'Day','Hour'])
    X = DF.drop(['Available Bikes', 'Available Stands'], axis=1)
    y_bikes = DF['Available Bikes']
    y_stands = DF['Available Stands']
    
    X_train, X_test, y_train_bikes, y_test_bikes, y_train_stands, y_test_stands = train_test_split(X, y_bikes, y_stands, \
                                                    test_size=0.2, random_state=36)
    bikesModel = RandomForestRegressor(n_estimators=150, random_state=36)
    standsModel = RandomForestRegressor(n_estimators=150, random_state=36)
    
    bikesModel.fit(X_train, y_train_bikes)
    standsModel.fit(X_train, y_train_stands)
    
    bikesFile = f"bikes_station_{key}.pkl"
    standsFile =f"stands_station_{key}.pkl"
    
    pickle.dump(bikesModel, open(bikesFile, 'wb'))
    pickle.dump(standsModel, open(standsFile, 'wb'))
    
    y_pred_bikes = bikesModel.predict(X_test)
    y_pred_stands = standsModel.predict(X_test)
    mse_bikes = mean_squared_error(y_test_bikes, y_pred_bikes)
    mse_stands = mean_squared_error(y_test_stands, y_pred_stands)
    print(f'Station {key} MSE for available bikes:', mse_bikes)
    print(f'Station {key} MSE for available stands:', mse_stands)
    print(r2_score(y_test_stands, y_pred_stands))
    print(r2_score(y_test_bikes, y_pred_bikes))