# 03_07: Solution: Build Your First Machine Learning Model

### Install the necessary libraries. 

In [1]:
# Step 1: Import the necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

### Load the data

In [2]:
# Step 2: Load the dataset
data = pd.read_csv('data.csv')

In [3]:
data

Unnamed: 0,study_hours,attendance_rate,pass
0,5.993428,60.846293,0
1,4.723471,70.793547,1
2,6.295377,71.572855,1
3,8.046060,66.977227,0
4,4.531693,73.387143,1
...,...,...,...
95,2.072970,78.853174,0
96,5.592241,66.161426,0
97,5.522111,76.537251,1
98,5.010227,75.582087,1


In [4]:
X = data.drop('pass', axis=1)  # Features
y = data['pass']               # Target

### Split the data for training and testing

In [5]:
# Step 3: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Train the model

In [6]:
# Step 4: Build and train the logistic regression model
model = LogisticRegression(solver='liblinear')  # Initialize the logistic regression model
model.fit(X_train, y_train)  # Train the model using the training data

### Make predictions

In [7]:
# Step 5: Make predictions on the test set
y_pred = model.predict(X_test)  # Predict the labels for the test data

### Print the results

In [8]:
# Step 6: Output the predictions alongside the true labels
results = pd.DataFrame({'True Labels': y_test, 'Predicted Labels': y_pred})
print(results)

    True Labels  Predicted Labels
83            0                 0
53            1                 1
70            0                 1
45            0                 0
44            0                 0
39            0                 0
22            1                 0
80            1                 0
10            0                 0
0             0                 1
18            0                 0
30            0                 0
73            1                 1
33            0                 0
90            1                 0
4             1                 0
76            1                 0
77            1                 0
12            1                 1
31            1                 1


### Test on a single dataset

In [9]:
#  Testing the model on a single data point
single_data_point = pd.DataFrame({'study_hours': [6], 'attendance_rate': [94]})  # Example: 6 study hours and 94% attendance
single_prediction = model.predict(single_data_point)

print(f'Prediction for study_hours=6 and attendance_rate=94: {"Pass" if single_prediction[0] == 1 else "Fail"}')

Prediction for study_hours=6 and attendance_rate=94: Pass


In [10]:
#  Testing the model on a single data point
single_data_point = pd.DataFrame({'study_hours': [0], 'attendance_rate': [20]})  # Example: 0 study hours and 20% attendance
single_prediction = model.predict(single_data_point)

print(f'Prediction for study_hours=0 and attendance_rate=20: {"Pass" if single_prediction[0] == 1 else "Fail"}')

Prediction for study_hours=0 and attendance_rate=20: Fail


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# calculate accuracy, precision, recall and F1 score

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1: .2f}')
