<a href="https://colab.research.google.com/github/ashutosh-linux/STML/blob/main/STML_PROJECT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression

# Load the dataset
# Specifying the encoding to 'latin-1' to handle the problematic byte
data = pd.read_csv('/content/genshin_characters_v1.csv', encoding='latin-1')
# You might need to experiment with other encodings like 'iso-8859-1', 'cp1252' if 'latin-1' doesn't work

# Display the first few rows to understand structure
print("Dataset Preview:")
print(data.head())

# Preprocess the data
# Drop missing values if any
data = data.dropna()

# Encoding categorical variables
# Applying Label Encoding to object (categorical) columns
for column in data.select_dtypes(include=['object']).columns:
    data[column] = LabelEncoder().fit_transform(data[column])

# Define features and target variable
# Assuming the last column is the target column, adjust as necessary
X = data.iloc[:, :-1]  # All columns except the last
y = data.iloc[:, -1]   # The last column as the target

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardizing the features (necessary for SVM and k-NN)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 1. Support Vector Machine
svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_predictions = svm_model.predict(X_test)
print("SVM Accuracy:", accuracy_score(y_test, svm_predictions))
print("SVM Classification Report:\n", classification_report(y_test, svm_predictions))

# Splitting the data into training and testing sets
# Reducing test_size to 0.2 to have more data in training set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. k-Nearest Neighbors
# Reducing n_neighbors to 3 to be less than or equal to the number of samples in the smallest class
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(X_train, y_train)
knn_predictions = knn_model.predict(X_test)
print("k-NN Accuracy:", accuracy_score(y_test, knn_predictions))
print("k-NN Classification Report:\n", classification_report(y_test, knn_predictions))
# 3. Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
dt_predictions = dt_model.predict(X_test)
print("Decision Tree Accuracy:", accuracy_score(y_test, dt_predictions))
print("Decision Tree Classification Report:\n", classification_report(y_test, dt_predictions))

# 4. Random Forest
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, rf_predictions))
print("Random Forest Classification Report:\n", classification_report(y_test, rf_predictions))

# 5. Linear Regression (convert predictions to binary for classification)
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_predictions = lr_model.predict(X_test)
# Converting predictions to binary labels (0 or 1) based on a threshold of 0.5
lr_predictions = [1 if pred >= 0.5 else 0 for pred in lr_predictions]
print("Linear Regression Accuracy:", accuracy_score(y_test, lr_predictions))
print("Linear Regression Classification Report:\n", classification_report(y_test, lr_predictions))

# 6. Logistic Regression
logistic_model = LogisticRegression(max_iter=200)
logistic_model.fit(X_train, y_train)
logistic_predictions = logistic_model.predict(X_test)
print("Logistic Regression Accuracy:", accuracy_score(y_test, logistic_predictions))
print("Logistic Regression Classification Report:\n", classification_report(y_test, logistic_predictions))


Dataset Preview:
  ï»¿character_name  star_rarity     region  vision arkhe weapon_type  \
0            Albedo            5  Mondstadt     Geo   NaN       Sword   
1         Alhaitham            5     Sumeru  Dendro   NaN       Sword   
2              Aloy            5        NaN    Cryo   NaN         Bow   
3             Amber            4  Mondstadt    Pyro   NaN         Bow   
4      Arataki Itto            5    Inazuma     Geo   NaN    Claymore   

  release_date          model       constellation birthday  ... atk_1_20  \
0   2020-12-23    Medium Male  Princeps Cretaceus   13-Sep  ...       20   
1   2023-01-18      Tall Male       Vultur Volans   11-Feb  ...       24   
2   2021-09-01  Medium Female         Nora Fortis   04-Apr  ...       18   
3   2020-09-28  Medium Female               Lepus   10-Aug  ...       19   
4   2021-12-14      Tall Male    Taurus Iracundus   01-Jun  ...       18   

  def_1_20 ascension_special_stat special_0 special_1 special_2 special_3  \
0       68

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

Random Forest Accuracy: 0.3333333333333333
Random Forest Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.50      1.00      0.67         1
           2       0.00      0.00      0.00         1
           3       0.00      0.00      0.00         1

    accuracy                           0.33         3
   macro avg       0.12      0.25      0.17         3
weighted avg       0.17      0.33      0.22         3

Linear Regression Accuracy: 0.0
Linear Regression Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       0.0
           1       0.00      0.00      0.00       1.0
           2       0.00      0.00      0.00       1.0
           3       0.00      0.00      0.00       1.0

    accuracy                           0.00       3.0
   macro avg       0.00      0.00      0.00       3.0
weighted avg       0.00      0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://sc