In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt





In [2]:
df = pd.read_csv('transformed_df.csv')
df.head()


Unnamed: 0,Question,Number of Sources,Trust Score,chatGPT,copilot,gemini,llama,perplexity,Economics & Commerce,Entertainment,...,y20india.in,yahoo.com,yale.edu,yaleclimateconnections.org,yellowbrick.co,yourdictionary.com,yourstory.com,youtube.com,zavvi.com,zerohanger.com
0,"""where does the saying keeping up with the jon...",2,10,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"""when did day light savings start in the us""",1,9,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"""what is the doll in the garden about""",3,11,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,"""where is a unitary system of government found""",3,11,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"""who dies in season 2 of the originals""",1,9,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [3]:
df.drop(['Question', 'Unnamed: 24'], axis=1, inplace=True)
display(df.head())
df.shape

Unnamed: 0,Number of Sources,Trust Score,chatGPT,copilot,gemini,llama,perplexity,Economics & Commerce,Entertainment,Food & Cooking,...,y20india.in,yahoo.com,yale.edu,yaleclimateconnections.org,yellowbrick.co,yourdictionary.com,yourstory.com,youtube.com,zavvi.com,zerohanger.com
0,2,10,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,9,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,11,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,3,11,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,9,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


(500, 968)

In [5]:
X = df.drop('Trust Score', axis=1)
y = df['Trust Score']

kf = KFold(n_splits=5, shuffle=True, random_state=42)
mae_scores = []

for train_index, val_index in kf.split(X):
    # Train-Test Split
    X_train, X_val = X.iloc[train_index], X.iloc[val_index]
    y_train, y_val = y.iloc[train_index], y.iloc[val_index]
    
    # Feature Scaling
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)
    
    # Model Definition
    model = Sequential()

    # Input Layer
    model.add(Input(shape=(X_train.shape[1],)))

    # Hidden Layers
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.3))

    # Output Layer
    model.add(Dense(1, activation='linear'))

    # Compile the Model
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

    # Early Stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Train the Model
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),  # Use X_val, y_val for validation
        epochs=100,
        callbacks=[early_stopping],
        verbose=1
    )

    # Evaluate the Model
    loss, mae = model.evaluate(X_val, y_val, verbose=0)
    print(f'Mean Absolute Error for fold: {mae:.2f}')
    
    # Append MAE for this fold
    mae_scores.append(mae)

# Final Mean MAE across all folds
print(f'Mean MAE across folds: {np.mean(mae_scores):.2f}')
# Print the lowest MAE from the KFold cross-validation
lowest_mae = min(mae_scores)
print(f'Lowest MAE from KFold cross-validation: {lowest_mae:.2f}')


Epoch 1/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 122.8518 - mae: 9.1920 - val_loss: 71.8985 - val_mae: 7.7425
Epoch 2/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 37.8588 - mae: 5.3951 - val_loss: 21.4311 - val_mae: 3.9163
Epoch 3/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 15.0674 - mae: 3.1008 - val_loss: 16.6562 - val_mae: 3.4698
Epoch 4/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 10.3207 - mae: 2.5451 - val_loss: 20.3649 - val_mae: 3.7560
Epoch 5/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 8.7179 - mae: 2.2689 - val_loss: 17.5422 - val_mae: 3.5078
Epoch 6/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 7.4592 - mae: 2.1543 - val_loss: 16.1226 - val_mae: 3.3665
Epoch 7/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/ste