In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.multiclass import OneVsOneClassifier

from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score


In [2]:
# Load the data
df = pd.read_csv('advertising.csv')

In [3]:
# Remove 'Ad Topic Line' and 'Timestamp' features
df = df.drop(['Ad Topic Line', 'Timestamp'], axis=1)

In [4]:

le_country = LabelEncoder()
le_city = LabelEncoder()

df['Country'] = le_country.fit_transform(df['Country'])
df['City'] = le_city.fit_transform(df['City'])


In [5]:
df.head()


Unnamed: 0,Daily Time Spent on Site,Age,Area Income,Daily Internet Usage,City,Male,Country,Clicked on Ad
0,68.95,35,61833.9,256.09,961,0,215,0
1,80.23,31,68441.85,193.77,903,1,147,0
2,69.47,26,59785.94,236.5,111,0,184,0
3,74.15,29,54806.18,245.89,939,1,103,0
4,68.37,35,73889.99,225.58,805,0,96,0


In [6]:
# Split between featues as X and target as y

X = df.drop('Clicked on Ad', axis=1)
y = df['Clicked on Ad']


In [7]:
# Apply StandardScaler to the input features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [8]:
# Set the data into training and test sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [9]:
# Set the model
model = SVC()

#Use One-vs-Rest (OvA) strategy
#model = OneVsRestClassifier(SVC())

#Use One-vs-One (OvO) strategy
#model = OneVsOneClassifier(SVC())

In [10]:
# model fit for the data train
model.fit(X_train, y_train)

In [11]:
# Make predictions on the test data
model_predict = model.predict(X_test)


In [12]:
# Calculate accuracy
accuracy = accuracy_score(y_test, model_predict)
print("\nAccuracy:", accuracy)


Accuracy: 0.945


In [13]:
# Run precision, recall, and f1 score

# Generate the classification report
report = classification_report(y_test, model_predict)
print("\nClassification Report:\n", report)

# Extract precision, recall, and F1-score from the report
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(y_test, model_predict)
recall = recall_score(y_test, model_predict)
f1 = f1_score(y_test, model_predict)

print("\nPrecision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)


Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.97      0.94        89
           1       0.97      0.93      0.95       111

    accuracy                           0.94       200
   macro avg       0.94      0.95      0.94       200
weighted avg       0.95      0.94      0.95       200


Precision: 0.9716981132075472
Recall: 0.9279279279279279
F1-Score: 0.9493087557603687


In [15]:
def get_new_data_point():
    """Prompts the user to input a new data point and returns it as a list."""
    
    # Print available countries and cities for reference
    print("\nAvailable countries:", sorted(le_country.classes_))
    print("Available cities:", sorted(le_city.classes_))
    
    while True:
        try:
            # Get numerical inputs
            daily_time_spent_on_site = float(input("\nEnter Daily Time Spent on Site: "))
            age = float(input("Enter Age: "))
            area_income = float(input("Enter Area Income: "))
            daily_internet_usage = float(input("Enter Daily Internet Usage: "))
            male = int(input("Enter Male (1 for Male, 0 for Female): "))
            
            # Get categorical inputs with validation
            country = input("Enter Country: ")
            city = input("Enter City: ")
            
            # Validate country and city
            if country not in le_country.classes_:
                raise ValueError(f"Invalid country. Please choose from: {', '.join(sorted(le_country.classes_))}")
            if city not in le_city.classes_:
                raise ValueError(f"Invalid city. Please choose from: {', '.join(sorted(le_city.classes_))}")
            
            # Transform categorical variables
            country_encoded = le_country.transform([country])[0]
            city_encoded = le_city.transform([city])[0]
            
            # Create DataFrame with proper column names matching the training data
            new_data = pd.DataFrame({
                'Daily Time Spent on Site': [daily_time_spent_on_site],
                'Age': [age],
                'Area Income': [area_income],
                'Daily Internet Usage': [daily_internet_usage],
                'City': [city_encoded],  # Note: City comes before Male in the original data
                'Male': [male],
                'Country': [country_encoded]
            })
            
            return new_data
            
        except ValueError as e:
            print(f"\nError: {str(e)}")
            retry = input("Do you want to try again? (y/n): ")
            if retry.lower() != 'y':
                return None
        except Exception as e:
            print(f"\nAn unexpected error occurred: {str(e)}")
            retry = input("Do you want to try again? (y/n): ")
            if retry.lower() != 'y':
                return None

# Example usage with better feedback
while True:
    print("\n=== New Prediction ===")
    new_data = get_new_data_point()
    
    if new_data is None:
        print("\nExiting prediction mode.")
        break
        
    try:
        # Scale the new data
        new_data_scaled = scaler.transform(new_data)
        
        # Make prediction
        prediction = model.predict(new_data_scaled)[0]
        probabilities = model.predict_proba(new_data_scaled)[0]
        
        print("\nPrediction Results:")
        print(f"Clicked on Ad: {'Yes' if prediction == 1 else 'No'}")
        print(f"Confidence: {max(probabilities) * 100:.2f}%")
        
        retry = input("\nDo you want to make another prediction? (y/n): ")
        if retry.lower() != 'y':
            break
            
    except Exception as e:
        print(f"\nError during prediction: {str(e)}")
        retry = input("Do you want to try again? (y/n): ")
        if retry.lower() != 'y':
            break


=== New Prediction ===

Available countries: ['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra', 'Angola', 'Anguilla', 'Antarctica (the territory South of 60 deg S)', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Bouvet Island (Bouvetoya)', 'Brazil', 'British Indian Ocean Territory (Chagos Archipelago)', 'British Virgin Islands', 'Brunei Darussalam', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde', 'Cayman Islands', 'Central African Republic', 'Chad', 'Chile', 'China', 'Christmas Island', 'Colombia', 'Comoros', 'Congo', 'Cook Islands', 'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czech Republic', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Eston

In [17]:
# Function to get new data point with proper feature handling
def get_new_data_point():
    """Get new data point from user with proper feature handling"""
    print("\nEnter new data for prediction:")
    
    # Numerical features
    daily_time = float(input("Enter Daily Time Spent on Site: "))
    age = int(input("Enter Age: "))
    area_income = float(input("Enter Area Income: "))
    daily_internet = float(input("Enter Daily Internet Usage: "))
    male = int(input("Enter Male (1 for Male, 0 for Female): "))
    
    # Categorical features
    country = input("Enter Country: ")
    city = input("Enter City: ")
    
    # Create DataFrame with proper column names
    new_data = pd.DataFrame({
        'Daily Time Spent on Site': [daily_time],
        'Age': [age],
        'Area Income': [area_income],
        'Daily Internet Usage': [daily_internet],
        'Male': [male],
        'Country': [country],
        'City': [city]
    })
    
    # Encode categorical variables
    new_data['Country'] = le_country.transform(new_data['Country'])
    new_data['City'] = le_city.transform(new_data['City'])
    
    return new_data

# Example usage
while True:
    try:
        new_data = get_new_data_point()
        
        # Scale the new data
        new_data_scaled = scaler.transform(new_data)
        
        # Make prediction
        prediction = model.predict(new_data_scaled)[0]
        probabilities = model.predict_proba(new_data_scaled)[0]
        
        print("\nPrediction:", "Clicked on Ad" if prediction == 1 else "Did not click on Ad")
        print("Prediction probabilities:", probabilities)
        
        retry = input("\nDo you want to make another prediction? (y/n): ")
        if retry.lower() != 'y':
            break
            
    except ValueError as e:
        print("Invalid input. Please enter numerical values.")
        retry = input("Do you want to start over? (y/n): ")
        if retry.lower() != 'y':
            break
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        retry = input("Do you want to start over? (y/n): ")
        if retry.lower() != 'y':
            break


Enter new data for prediction:
Invalid input. Please enter numerical values.


I made an intential error in entering country where the provision does not provide the numerical value for each country (and also probably city entry too). Fix this with help from Gemini or other AI coding assistant.