In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from google.colab import drive
drive.mount('/content/drive')
import numpy as np
import matplotlib.pyplot as plt
import warnings
from sklearn.exceptions import UndefinedMetricWarning
# OR Suppress only specific warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=UndefinedMetricWarning)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
# Load your dataset
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Periodic Table of Elements Goodman Sciences Github.csv', usecols=['AtomicMass', 'NumberofNeutrons', 'NumberofProtons', 'NumberofElectrons', 'Period', 'Group', 'Phase'])

# If 'Phase' is textual, encode it to numeric
if data['Phase'].dtype == 'object':
    le = LabelEncoder()
    data['Phase'] = le.fit_transform(data['Phase'])

# Drop rows with NaN values in the specified columns
data = data.dropna(subset=['AtomicMass', 'NumberofNeutrons', 'NumberofProtons', 'NumberofElectrons', 'Period', 'Group', 'Phase'])

# Separate features and target variable
X = data[['AtomicMass', 'NumberofNeutrons', 'NumberofProtons', 'NumberofElectrons', 'Period', 'Group']]
y = data['Phase']

# Split the dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Feature Scaling: Standardize features by removing the mean and scaling to unit variance
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Training the original Softmax Regression model
classifier_original = LogisticRegression(multi_class='multinomial', solver='sag', random_state=0, verbose=0, max_iter=10000)
classifier_original.fit(X_train, y_train)

# Creating a copy of the training data and scaling the 'Group' column by a factor of 10
X_train_scaled = X_train.copy()
X_train_scaled[:, list(X.columns).index('Group')] *= 10

# Training the new Softmax Regression model with the scaled 'Group'
classifier_scaled = LogisticRegression(multi_class='multinomial', solver='sag', random_state=0, verbose=0, max_iter=10000)
classifier_scaled.fit(X_train_scaled, y_train)

# Predicting the Test set results for the original model
y_pred_original = classifier_original.predict(X_test)

# Evaluate the original model
print("Original Model Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred_original))
print("\nClassification Report:\n", classification_report(y_test, y_pred_original))

# Predicting the Test set results for the scaled model
y_pred_scaled = classifier_scaled.predict(sc.transform(X_test))  # Scaling X_test before predictions

# Evaluate the scaled model
print("\nScaled 'Group' Model Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred_scaled))
print("\nClassification Report:\n", classification_report(y_test, y_pred_scaled))

# Comparing convergence
print(f"\nOriginal model took {classifier_original.n_iter_[0]} iterations to converge.")
print(f"Scaled 'Group' model took {classifier_scaled.n_iter_[0]} iterations to converge.")
print("Therefore, scaling Group I reduces the convergence rate.")



Original Model Performance:
Accuracy: 0.8888888888888888

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      0.50      0.67         4
           3       0.86      1.00      0.92        12

    accuracy                           0.89        18
   macro avg       0.95      0.83      0.86        18
weighted avg       0.90      0.89      0.87        18


Scaled 'Group' Model Performance:
Accuracy: 0.2222222222222222

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.25      1.00      0.40         4
           3       0.00      0.00      0.00        12

    accuracy                           0.22        18
   macro avg       0.08      0.33      0.13        18
weighted avg       0.06      0.22      0.09        18


Original model took 29 iterations to converge.
Scaled 'Group' mode