In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math  
import sklearn.metrics  

In [2]:
dataset = pd.read_csv('SHARES_DATASET.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct=ColumnTransformer(transformers=[('encoder',OneHotEncoder(),[0])],remainder='passthrough')
X=np.array(ct.fit_transform(X))


In [4]:
print(X[0])

[0.0 1.0 0.0 0.0 0.0 1541.0 1557.0 1564.65 1470.0 1474.95 1476.5 1499.5
 982492 577454]


In [5]:
X_a = dataset.iloc[:, 1:-1].values

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [7]:
print(X_test)

[[0.0 0.0 1.0 ... 2840.8 181855 134238]
 [0.0 0.0 0.0 ... 236.02 7236342 3286299]
 [0.0 1.0 0.0 ... 2658.47 185737 61458]
 ...
 [1.0 0.0 0.0 ... 540.34 6618167 3082818]
 [0.0 0.0 1.0 ... 2169.33 156789 102315]
 [0.0 0.0 0.0 ... 559.28 6111245 1504664]]


In [8]:
from sklearn.linear_model import LinearRegression
regressor_m=LinearRegression()
regressor_m.fit(X_train,y_train)

In [9]:
from sklearn.ensemble import RandomForestRegressor
regressor_r = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor_r.fit(X_train, y_train)

In [10]:
from sklearn.preprocessing import PolynomialFeatures

poly_reg = PolynomialFeatures(degree =2 )
X_poly = poly_reg.fit_transform(X_train)
regressor_P = LinearRegression()
regressor_P.fit(X_poly, y_train)

# Polynomial Regression

In [11]:
y_pred_P = regressor_P.predict(poly_reg.transform(X_test))
np.set_printoptions(precision=3)
print(np.concatenate((y_pred_P.reshape(len(y_pred_P),1), y_test.reshape(len(y_test),1)),1))

[[0.657 0.738]
 [0.509 0.454]
 [0.448 0.331]
 ...
 [0.468 0.466]
 [0.579 0.653]
 [0.251 0.246]]


In [12]:
from sklearn.metrics import r2_score
r2_P=r2_score(y_test,y_pred_P)
print(r2_P)

0.6601655384359879


In [13]:
mse = sklearn.metrics.mean_squared_error(y_test, y_pred_P)  
  
rmse = math.sqrt(mse)  
  
print("The difference between actual and predicted values", rmse)  

The difference between actual and predicted values 0.09831610819356011


## Random forest regression

In [14]:
y_pred_r=regressor_r.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred_r.reshape(len(y_pred_r),1),y_test.reshape(len(y_pred_r),1)),1))

[[0.71 0.74]
 [0.43 0.45]
 [0.31 0.33]
 ...
 [0.46 0.47]
 [0.68 0.65]
 [0.24 0.25]]


In [15]:
from sklearn.metrics import r2_score
r2_RF=r2_score(y_test,y_pred_r)
print(r2_RF)

0.9810681542081153


In [16]:
mse = sklearn.metrics.mean_squared_error(y_test, y_pred_r)  
  
rmse = math.sqrt(mse)  
  
print("The difference between actual and predicted values", rmse)  

The difference between actual and predicted values 0.023205313998161108


In [17]:
print(regressor_r.predict([[0, 1, 0, 0, 0, 1541, 1557, 1564.65, 1470, 1474.95, 1476.5, 1499.5, 982492, 577454]]))

[0.6]


## Multiple regression 

In [18]:
y_pred_m=regressor_m.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred_m.reshape(len(y_pred_m),1),y_test.reshape(len(y_pred_m),1)),1))

[[0.59 0.74]
 [0.52 0.45]
 [0.5  0.33]
 ...
 [0.45 0.47]
 [0.58 0.65]
 [0.28 0.25]]


In [19]:
from sklearn.metrics import r2_score
r2_M=r2_score(y_test,y_pred_m)
print(r2_M)

0.5437102664705493


In [20]:
mse = sklearn.metrics.mean_squared_error(y_test, y_pred_m)  
  
rmse = math.sqrt(mse)  
  
print("The difference between actual and predicted values", rmse)  

The difference between actual and predicted values 0.11392297531600681


# Prediction

In [21]:
if r2_M>r2_RF and r2_M>r2_P:
    print(regressor_m.predict([[0, 1, 0, 0, 0, 1541, 1557, 1564.65, 1470, 1474.95, 1476.5, 1499.5, 982492, 577454]]))
elif r2_RF>r2_M and r2_RF>r2_P:
    print(regressor_r.predict([[0, 1, 0, 0, 0, 1541, 1557, 1564.65, 1470, 1474.95, 1476.5, 1499.5, 982492, 577454]]))
else:
    print(regressor_P.predict([[0, 1, 0, 0, 0, 1541, 1557, 1564.65, 1470, 1474.95, 1476.5, 1499.5, 982492, 577454]]))

[0.6]
