In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Step 1: Load the Data
train_df = pd.read_csv("train.csv")

# Step 2: Handle Missing Values
# Drop rows with missing target values
train_df = train_df.dropna(subset=['Degerlendirme Puani'])

# Drop rows with missing values in the selected features for simplicity
train_df = train_df.dropna(subset=['Dogum Yeri', 'Ikametgah Sehri', 'Universite Adi', 'Universite Turu'])

# Step 3: Preprocess the Data
# Define feature columns and target
features = ['Dogum Yeri', 'Ikametgah Sehri', 'Universite Adi', 'Universite Turu']
target = 'Degerlendirme Puani'

X = train_df[features]
y = train_df[target]

# Create a ColumnTransformer for encoding categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), features)
    ],
    remainder='passthrough'
)

# Create a pipeline that first transforms the data and then fits the model
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train a Classification Model
pipeline.fit(X_train, y_train)

# Step 5: Evaluate the Model
y_pred = pipeline.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# For predicting on new data (test_x.csv), use similar preprocessing


  train_df = pd.read_csv("train.csv")


Accuracy: 0.03939223410241981
Classification Report:
               precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         4
         1.0       0.00      0.00      0.00         4
         2.0       0.03      0.02      0.03        42
         3.0       0.03      0.03      0.03        36
         4.0       0.14      0.03      0.05        31
         5.0       0.02      0.02      0.02       114
         6.0       0.06      0.07      0.06       135
         7.0       0.07      0.08      0.07       308
         8.0       0.04      0.05      0.05       283
         9.0       0.05      0.07      0.06       162
        10.0       0.04      0.04      0.04       167
        11.0       0.04      0.05      0.05       135
        12.0       0.04      0.06      0.05       327
        13.0       0.03      0.03      0.03       227
        14.0       0.04      0.04      0.04       309
        15.0       0.05      0.05      0.05       232
        16.0       0.03    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
