In [1]:
import torch
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn

In [2]:
data = pd.read_csv('./Datasets/bike_sharing.csv', index_col=0)   
        

In [3]:
data

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,spring,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,331,654,985
1,2,2011-01-02,spring,0,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801
2,3,2011-01-03,spring,0,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349
3,4,2011-01-04,spring,0,1,0,2,1,1,0.200000,0.212122,0.590435,0.160296,108,1454,1562
4,5,2011-01-05,spring,0,1,0,3,1,1,0.226957,0.229270,0.436957,0.186900,82,1518,1600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
726,727,2012-12-27,spring,1,12,0,4,1,2,0.254167,0.226642,0.652917,0.350133,247,1867,2114
727,728,2012-12-28,spring,1,12,0,5,1,2,0.253333,0.255046,0.590000,0.155471,644,2451,3095
728,729,2012-12-29,spring,1,12,0,6,0,2,0.253333,0.242400,0.752917,0.124383,159,1182,1341
729,730,2012-12-30,spring,1,12,0,0,0,1,0.255833,0.231700,0.483333,0.350754,364,1432,1796


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 731 entries, 0 to 730
Data columns (total 16 columns):
instant       731 non-null int64
dteday        731 non-null object
season        731 non-null object
yr            731 non-null int64
mnth          731 non-null int64
holiday       731 non-null int64
weekday       731 non-null int64
workingday    731 non-null int64
weathersit    731 non-null int64
temp          731 non-null float64
atemp         731 non-null float64
hum           731 non-null float64
windspeed     731 non-null float64
casual        731 non-null int64
registered    731 non-null int64
cnt           731 non-null int64
dtypes: float64(4), int64(10), object(2)
memory usage: 97.1+ KB


In [5]:
data.shape

(731, 16)

In [6]:
#one hot encoding for season colomn
data = pd.get_dummies(data, columns= ['season'])
data.sample(5)

Unnamed: 0,instant,dteday,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt,season_fall,season_spring,season_summer,season_winter
712,713,2012-12-13,1,12,0,4,1,1,0.295833,0.294188,0.485833,0.174129,425,5107,5532,0,0,0,1
84,85,2011-03-26,0,3,0,6,0,1,0.265833,0.257571,0.394167,0.209571,981,1515,2496,0,0,1,0
169,170,2011-06-19,0,6,0,0,0,2,0.699167,0.645846,0.666667,0.102,1639,3105,4744,0,0,1,0
707,708,2012-12-08,1,12,0,6,0,2,0.381667,0.389508,0.91125,0.101379,1153,4429,5582,0,0,0,1
127,128,2011-05-08,0,5,0,0,0,1,0.528333,0.518933,0.631667,0.074637,1401,2932,4333,0,0,1,0


In [7]:
columns = ['registered', 'holiday', 'weekday', 
           'weathersit', 'temp', 'atemp',
           'season_fall', 'season_spring', 
           'season_summer', 'season_winter']


features = data[columns]

In [8]:
features.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 731 entries, 0 to 730
Data columns (total 10 columns):
registered       731 non-null int64
holiday          731 non-null int64
weekday          731 non-null int64
weathersit       731 non-null int64
temp             731 non-null float64
atemp            731 non-null float64
season_fall      731 non-null uint8
season_spring    731 non-null uint8
season_summer    731 non-null uint8
season_winter    731 non-null uint8
dtypes: float64(2), int64(4), uint8(4)
memory usage: 42.8 KB


In [44]:

labels = data[['cnt']]

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
X_train, x_test, Y_train, y_test = train_test_split(features,
                                                    labels,
                                                    test_size=0.2)

In [12]:
#convert to pytorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype = torch.float)
x_test_tensor = torch.tensor(x_test.values, dtype = torch.float)

Y_train_tensor = torch.tensor(Y_train.values, dtype = torch.float)
y_test_tensor = torch.tensor(y_test.values, dtype = torch.float)

In [13]:
X_train_tensor.shape

torch.Size([584, 10])

In [14]:
Y_train_tensor.shape

torch.Size([584, 1])

In [15]:
import torch.utils.data as data_utils 

In [16]:
#torch.utils.data.TensorDataset(*tensors) - Dataset wrapping tensors.Each sample will be retrieved by indexing tensors along the first dimension.

train_data = data_utils.TensorDataset(X_train_tensor, Y_train_tensor)

In [17]:
#torch.utils.data.DataLoader - Combines a dataset and a sampler, and provides single- or multi-process iterators over the dataset.
#torch.utils.data.DataLoader provides
#Batching the data
#Shuffling the data
#Load the data in parallel using multiprocessing workers

In [18]:
#divided into batches
train_loader = data_utils.DataLoader(train_data, batch_size=100, shuffle=True)

In [19]:
len(train_loader)

6

In [20]:
features_batch, target_batch = iter(train_loader).next()

In [21]:
features_batch.shape

torch.Size([100, 10])

In [22]:
target_batch.shape

torch.Size([100, 1])

In [23]:
inp = X_train_tensor.shape[1]
out = 1
hid = 10
loss_fn = torch.nn.MSELoss()

In [24]:
model = torch.nn.Sequential(torch.nn.Linear(inp, hid),
#                             torch.nn.ReLU(),
#                             torch.nn.Dropout(p=0.2),
                            torch.nn.Linear(hid, out))

In [28]:
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
total_step = len(train_loader)

num_epochs = 10000

for epoch in range(num_epochs + 1):
    for i, (features, target) in enumerate(train_loader):
        
        output = model(features)
        loss = loss_fn(output, target)
        
        optimizer.zero_grad()
        
        loss.backward()
        
        optimizer.step()
        
        if epoch % 2000 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch [1/10000], Step [1/6], Loss: 17771176.0000
Epoch [1/10000], Step [2/6], Loss: 18823804.0000
Epoch [1/10000], Step [3/6], Loss: 16960736.0000
Epoch [1/10000], Step [4/6], Loss: 16741245.0000
Epoch [1/10000], Step [5/6], Loss: 17867840.0000
Epoch [1/10000], Step [6/6], Loss: 17924044.0000
Epoch [2001/10000], Step [1/6], Loss: 312838.9375
Epoch [2001/10000], Step [2/6], Loss: 331863.7812
Epoch [2001/10000], Step [3/6], Loss: 314526.0938
Epoch [2001/10000], Step [4/6], Loss: 421822.6250
Epoch [2001/10000], Step [5/6], Loss: 378884.3750
Epoch [2001/10000], Step [6/6], Loss: 403173.5625
Epoch [4001/10000], Step [1/6], Loss: 232031.0156
Epoch [4001/10000], Step [2/6], Loss: 316376.7812
Epoch [4001/10000], Step [3/6], Loss: 340217.9375
Epoch [4001/10000], Step [4/6], Loss: 376278.0625
Epoch [4001/10000], Step [5/6], Loss: 298444.3125
Epoch [4001/10000], Step [6/6], Loss: 320907.4375
Epoch [6001/10000], Step [1/6], Loss: 423405.7812
Epoch [6001/10000], Step [2/6], Loss: 297299.1562
Epoch 

In [29]:
model.eval()

with torch.no_grad():
    y_pred = model(x_test_tensor)

In [30]:
sample = x_test.iloc[45]
sample

registered       1437.000000
holiday             0.000000
weekday             6.000000
weathersit          2.000000
temp                0.384167
atemp               0.378779
season_fall         0.000000
season_spring       1.000000
season_summer       0.000000
season_winter       0.000000
Name: 63, dtype: float64

In [31]:
sample_tensor = torch.tensor(sample.values, 
                             dtype = torch.float)
sample_tensor

tensor([1.4370e+03, 0.0000e+00, 6.0000e+00, 2.0000e+00, 3.8417e-01, 3.7878e-01,
        0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00])

In [32]:
with torch.no_grad():
    y_pred = model(sample_tensor)

print("Predicted count : ", (y_pred.item()))
print("Actual count : ", (y_test.iloc[45]))

Predicted count :  1780.1666259765625
Actual count :  cnt    2077
Name: 63, dtype: int64


In [33]:
with torch.no_grad():
    y_pred_tensor = model(x_test_tensor)

In [34]:
y_pred = y_pred_tensor.detach().numpy()

y_pred.shape

(147, 1)

In [35]:
compare_df = pd.DataFrame({'actual': np.squeeze(y_test.values), 'predicted': np.squeeze(y_pred)})

compare_df.sample(10)

Unnamed: 0,actual,predicted
70,4639,4965.233398
26,3333,3464.996826
93,6624,5334.277832
75,2277,2350.21875
78,3285,3670.842041
95,5698,6079.832031
48,4985,4971.19043
115,4187,4241.695801
25,3624,3876.547363
87,6370,6906.423828


In [36]:
sklearn.metrics.r2_score(y_test, y_pred)

0.9131463485308102

In [39]:
torch.save(model, 'my_model')

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


In [40]:
#Loading an already saved model
#We can load a saved model using the torch.load() method

In [41]:
saved_model = torch.load('my_model')

In [42]:
y_pred_tensor = saved_model(x_test_tensor)

In [43]:
y_pred = y_pred_tensor.detach().numpy()