In [1]:

# We can easily train a model with numeric data and once it has learned the relationships between the 
# different datapoints, we can use the trained model to make predictions into the future.


In [1]:

import pandas as pd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import make_moons
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier
 
 
# Intitialise data of lists
data = [{'Year': 2020, 'Airport':2000, 'Casino':5000, 'Stadium':9000, 'Size':'1'}, 
       {'Year': 2019, 'Airport':3000, 'Casino':4000, 'Stadium':12000, 'Size':'2'},
       {'Year': 2018, 'Airport':5000, 'Casino':9000, 'Stadium':10000, 'Size':'2'},
       {'Year': 2017, 'Airport':5000, 'Casino':10000, 'Stadium':15000, 'Size':'3'}]
df = pd.DataFrame(data)


df = df.set_index(['Year'])
df


Unnamed: 0_level_0,Airport,Casino,Stadium,Size
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020,2000,5000,9000,1
2019,3000,4000,12000,2
2018,5000,9000,10000,2
2017,5000,10000,15000,3


In [3]:

X = df[['Airport', 'Casino', 'Stadium', ]]
y = df['Size']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = AdaBoostClassifier(n_estimators=100)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)


# now we can have our trained model make a prediction
print(clf.predict([[5000, 10000, 15000]]))


# Result:
['3']


# Perfect!!


['3']




['3']

In [4]:

# We got exactly what we would expect in the exercise above. Now, let's see how the model can make a prediction on
# data that it has not yet been exposed to.

print(clf.predict([[5000, 10000, 10000]]))


# Result:
['2']


# Nice!!


['2']




In [None]:

# Making predictions on labeled data requires a little more effort, because we have to convert labled data to
# categorical data. Machines can easily interpre numbers, but it's much more difficult for non-humans to
# understand the difference between small, medium and large, for instance.
# So it’s not quite as straightforward as making predictions on numeric data, but it’s not too labor intensive.


In [5]:

import pandas as pd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import make_moons
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier

from sklearn.preprocessing import LabelEncoder, OrdinalEncoder


# Intitialise data of lists
data = [{'Year': 2020, 'Airport':2000, 'Casino':5000, 'Stadium':9000, 'Size':'Small'}, 
       {'Year': 2019, 'Airport':3000, 'Casino':4000, 'Stadium':12000, 'Size':'Medium'},
       {'Year': 2018, 'Airport':5000, 'Casino':9000, 'Stadium':10000, 'Size':'Medium'},
       {'Year': 2017, 'Airport':5000, 'Casino':10000, 'Stadium':15000, 'Size':'Large'}]
df = pd.DataFrame(data)


df = df.set_index(['Year'])
df


Unnamed: 0_level_0,Airport,Casino,Stadium,Size
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020,2000,5000,9000,Small
2019,3000,4000,12000,Medium
2018,5000,9000,10000,Medium
2017,5000,10000,15000,Large


In [6]:

feature_encoder = OrdinalEncoder()
target_encoder = LabelEncoder()

X = df.loc[:, ['Airport', 'Casino', 'Stadium']]
y = df.loc[:, "Size"]

X = feature_encoder.fit_transform(X)
y = target_encoder.fit_transform(y)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = AdaBoostClassifier(n_estimators=100)
clf.fit(X_train, y_train)


In [7]:

# training model
ex_x = [[5000,10000,15000]]
ex_x_encoded = feature_encoder.transform(ex_x)
ex_y_encoded = clf.predict(ex_x_encoded)
ex_y = target_encoder.inverse_transform(ex_y_encoded)
print(ex_y)


# Result:
['Large']


# Perfect!!


['Large']




['Large']

In [9]:

# Again, let's see how the model performs on new data.

ex_x = [[5000,10000,10000]]
ex_x_encoded = feature_encoder.transform(ex_x)
ex_y_encoded = clf.predict(ex_x_encoded)
ex_y = target_encoder.inverse_transform(ex_y_encoded)
print(ex_y)


# Result:
['Medium']


# Perfect!!

['Medium']


