<a href="https://colab.research.google.com/github/Siddhi2156/mlda/blob/main/Preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Load dataset
data = pd.read_csv("vgsales.csv")
print("First 5 rows:\n", data.head(), "\n")

# Step 2: Drop rows with missing Year
data = data.dropna(subset=['Year'])

# Step 3: Create binary target (high sales or not)
median_sales = data['Global_Sales'].median()
data['high_sales'] = (data['Global_Sales'] > median_sales).astype(int)
print("Median Global Sales:", median_sales, "\n")

# Step 4: Select features and make a COPY to avoid warnings
features = ['Platform', 'Year', 'Genre', 'Publisher']
X = data[features].copy()
y = data['high_sales']

# Step 5: Encode categorical columns with LabelEncoder
le = LabelEncoder()
for col in ['Platform', 'Genre', 'Publisher']:
    X.loc[:, col] = le.fit_transform(X[col])

# Step 6: Scale numeric feature 'Year'
scaler = StandardScaler()
X.loc[:, 'Year'] = scaler.fit_transform(X[['Year']])

# Step 7: Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Step 8: Logistic Regression model
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)

# Step 9: Evaluation
y_pred = lr.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

First 5 rows:
    Rank                      Name Platform    Year         Genre Publisher  \
0     1                Wii Sports      Wii  2006.0        Sports  Nintendo   
1     2         Super Mario Bros.      NES  1985.0      Platform  Nintendo   
2     3            Mario Kart Wii      Wii  2008.0        Racing  Nintendo   
3     4         Wii Sports Resort      Wii  2009.0        Sports  Nintendo   
4     5  Pokemon Red/Pokemon Blue       GB  1996.0  Role-Playing  Nintendo   

   NA_Sales  EU_Sales  JP_Sales  Other_Sales  Global_Sales  
0     41.49     29.02      3.77         8.46         82.74  
1     29.08      3.58      6.81         0.77         40.24  
2     15.85     12.88      3.79         3.31         35.82  
3     15.75     11.01      3.28         2.96         33.00  
4     11.27      8.89     10.22         1.00         31.37   

Median Global Sales: 0.17 

Accuracy: 0.5688916105327618

Classification Report:
               precision    recall  f1-score   support

           