In [1]:
import warnings
warnings.filterwarnings('ignore')

# Regression

In [2]:
import pandas as pd

df = pd.DataFrame([['01-01-2001', 'YES', 14.3],
                   ['01-02-2001', 'NO', 13.7],
                   ['01-03-2001', 'NO', 13.6],
                   ['01-04-2001', 'YES', 14.3],
                   ['01-05-2001', 'NO', 14.2],
                   ['01-06-2001', 'YES', 12.8],
                   ['01-07-2001', 'NO', 14.7],
                   ['01-08-2001', 'NO', 11.3],
                   ['01-09-2001', 'NO', 11.7],
                   ['01-10-2001', 'NO', 12.1],
                  ],
                  columns = ['Date', 'Cloudy', 'Temperature']
                 )

df

Unnamed: 0,Date,Cloudy,Temperature
0,01-01-2001,YES,14.3
1,01-02-2001,NO,13.7
2,01-03-2001,NO,13.6
3,01-04-2001,YES,14.3
4,01-05-2001,NO,14.2
5,01-06-2001,YES,12.8
6,01-07-2001,NO,14.7
7,01-08-2001,NO,11.3
8,01-09-2001,NO,11.7
9,01-10-2001,NO,12.1


In [3]:
import datetime
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Feature Engineering
# Get Month and Day of the Year
df['Month'] = pd.to_datetime(df['Date']).dt.month
referenceDate = np.array([datetime.datetime(2001, 1, 1)] * len(df))
df['DayOfYear'] = (pd.to_datetime(df['Date']) - referenceDate).dt.days

# Convert Cloudy to numbers
leC = LabelEncoder()
df['Cloudy'] = leC.fit_transform(df['Cloudy'])

df

Unnamed: 0,Date,Cloudy,Temperature,Month,DayOfYear
0,01-01-2001,1,14.3,1,0
1,01-02-2001,0,13.7,1,1
2,01-03-2001,0,13.6,1,2
3,01-04-2001,1,14.3,1,3
4,01-05-2001,0,14.2,1,4
5,01-06-2001,1,12.8,1,5
6,01-07-2001,0,14.7,1,6
7,01-08-2001,0,11.3,1,7
8,01-09-2001,0,11.7,1,8
9,01-10-2001,0,12.1,1,9


In [4]:
X = df[['Month', 'DayOfYear', 'Cloudy']]
y = df['Temperature']

from sklearn.ensemble import RandomForestRegressor
from dateutil import parser
import numpy as np

# Build Model
rfr = RandomForestRegressor()
rfr.fit(X, y)

# Prepare Test Data
testData = ['01-22-2001', 'NO']

Xtest = [0, 0, 0]
Xtest[0] = parser.parse(testData[0]).month
Xtest[1] = (parser.parse(testData[0]) - referenceDate[0]).days
Xtest[2] = leC.transform([testData[1]])[0]
print('Transformed test data:', Xtest)

# Make the prediction
prediction = rfr.predict(np.array(Xtest).reshape(1, -1))
print('\nPredicted temperature on %s = %f' % (testData[0], prediction))

Transformed test data: [1, 21, 0]

Predicted temperature on 01-22-2001 = 12.040000
