In [3]:
# Set 7 Predictions using Scikit-Learn

# 7.1 Build Model with California Housing Data for house price prediction.
# Train at least 2 model regression models. Print their accuracy.

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor

# Load the data
from sklearn.datasets import fetch_california_housing

california = fetch_california_housing()
X = california.data
y = california.target

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Train the model
# Linear Regression
linear_regressor = LinearRegression()
linear_regressor.fit(X_train, y_train)
y_pred = linear_regressor.predict(X_test)
print("Linear Regression Mean Squared Error: ", mean_squared_error(y_test, y_pred))
print("Accuracy: ", linear_regressor.score(X_test, y_test))
print("\n")
# Random Forest
random_forest_regressor = RandomForestRegressor()
random_forest_regressor.fit(X_train, y_train)
y_pred = random_forest_regressor.predict(X_test)
print("Random Forest Mean Squared Error: ", mean_squared_error(y_test, y_pred))
print("Accuracy: " , random_forest_regressor.score(X_test, y_test))
print("\n")
# Decision Tree
decision_tree_regressor = DecisionTreeRegressor()
decision_tree_regressor.fit(X_train, y_train)
y_pred = decision_tree_regressor.predict(X_test)
print("Decision Tree Mean Squared Error: ", mean_squared_error(y_test, y_pred))
print("Accuracy: ", decision_tree_regressor.score(X_test, y_test))
print("\n")

Linear Regression Mean Squared Error:  0.5289841670367241
Accuracy:  0.5943232652466177


Random Forest Mean Squared Error:  0.2610799333019674
Accuracy:  0.7997784027357855


Decision Tree Mean Squared Error:  0.5336582046743216
Accuracy:  0.590738756588151




In [4]:
# 7.2 Build Model with MNIST dataset for handwritten digit prediction.
# Train at least 2 classification models. Print their accuracy and confusion matrix.
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import fetch_openml

# Load the MNIST dataset
mnist = fetch_openml('mnist_784')

X = mnist.data
y = mnist.target

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Train the model
# Random Forest
random_forest_classifier = RandomForestClassifier()
random_forest_classifier.fit(X_train, y_train)
y_pred = random_forest_classifier.predict(X_test)
print("Random Forest Accuracy: ", random_forest_classifier.score(X_test, y_test))

# Decision Tree
decision_tree_classifier = DecisionTreeClassifier()
decision_tree_classifier.fit(X_train, y_train)
y_pred = decision_tree_classifier.predict(X_test)
print("Decision Tree Accuracy: ", decision_tree_classifier.score(X_test, y_test))

# Confusion Matrix
from sklearn.metrics import confusion_matrix
print("Random Forest Confusion Matrix: ")
print(confusion_matrix(y_test, y_pred))
print("\n")
print("Decision Tree Confusion Matrix: ")
print(confusion_matrix(y_test, y_pred))
print("\n")



Random Forest Accuracy:  0.9655714285714285
Decision Tree Accuracy:  0.8702857142857143
Random Forest Confusion Matrix: 
[[1275    3   19    8    8   21   28    3   13    9]
 [   1 1517   14   15    5    3    2    4   16    3]
 [  11   17 1234   42   14   13   16   34   42   20]
 [   3   19   44 1181    8   70    9   25   47   29]
 [   8    1   18    9 1179    6   22   13   25   69]
 [  30   13   25   40   11 1008   31   15   36   22]
 [  25    5   21    4   18   27 1237    5   32   13]
 [  14   11   37   22   14   13    1 1304   13   29]
 [  26   22   34   42   19   39   26   15 1090   55]
 [   6    7   11   23   63   28    9   29   26 1159]]


Decision Tree Confusion Matrix: 
[[1275    3   19    8    8   21   28    3   13    9]
 [   1 1517   14   15    5    3    2    4   16    3]
 [  11   17 1234   42   14   13   16   34   42   20]
 [   3   19   44 1181    8   70    9   25   47   29]
 [   8    1   18    9 1179    6   22   13   25   69]
 [  30   13   25   40   11 1008   31   15   36  