In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

# Load your data into a pandas DataFrame
df = pd.read_csv("C:\\Users\\Ankit Rai\\Videos\\project_prediction\\balanced_data.csv")

# Split the DataFrame into input and output variables
X = df[['Chemical', 'Chemical.1', 'Chemical.2', 'Chemical.3', 'Chemical.4', 'Chemical.5', 'Chemical.6', 'Chemical.7','Chemical.8']]
Y = df[['Mechanical', 'Mechanical.1', 'Mechanical.2', 'Mechanical.3','Microstructure']]

# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=42)

# Standardize the input data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define a range of k values to evaluate
k_values = list(range(1, 25))  # Adjust the range as needed

# Create a dictionary of hyperparameters to search
param_grid = {'n_neighbors': k_values}

# Create a k-NN regression model
knn_model = KNeighborsRegressor()

# Perform grid search with cross-validation
grid_search = GridSearchCV(knn_model, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train_scaled, Y_train)

# Get the best hyperparameters
best_k = grid_search.best_params_['n_neighbors']

# Train the model with the best k value
best_knn_model = KNeighborsRegressor(n_neighbors=best_k)
best_knn_model.fit(X_train_scaled, Y_train)

# Make predictions on the testing data
Y_pred = best_knn_model.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(Y_test, Y_pred)
print("Best k:", best_k)
print("Mean Squared Error:", mse)

Best k: 24
Mean Squared Error: 109.62993871453325


In [6]:
from sklearn.metrics import mean_squared_error

# Assuming y_test and y_pred are your test labels and predicted labels respectively
for i, column in enumerate(Y_test.columns):
    mse = mean_squared_error(Y_test[column], Y_pred[:, i])
    print(f"MSE for output {column}: {mse}")


MSE for output Mechanical: 31.809655268615153
MSE for output Mechanical.1: 487.5559959289815
MSE for output Mechanical.2: 28.65754530850039
MSE for output Mechanical.3: 0.12649706656925866
MSE for output Microstructure: 4.543838814073028e-28


In [7]:
data=Y_pred
columns=['Mechanical_pred','Mechanical.1_pred','Mechanical.2_pred','Mechanical.3_pred','Microstructure_pred']
df2=pd.DataFrame(data,columns=columns)

In [8]:
input_df=Y_test
input_df = input_df.reset_index(drop=True)
input_df

Unnamed: 0,Mechanical,Mechanical.1,Mechanical.2,Mechanical.3,Microstructure
0,234.0,685.0,506.0,8.59,91.0
1,216.818802,683.012456,505.305928,8.060615,91.0
2,219.0,685.0,507.0,7.69,91.0
3,226.0,715.0,516.0,8.64,91.0
4,226.256858,718.583756,514.634035,8.245829,91.0
5,227.922961,754.896051,522.937356,7.705785,91.0
6,231.0,709.0,512.0,8.24,91.0
7,225.232485,714.585415,515.568463,8.44921,91.0


In [9]:
merged_df = pd.concat([df2, input_df], axis=1)

In [10]:
merged_df

Unnamed: 0,Mechanical_pred,Mechanical.1_pred,Mechanical.2_pred,Mechanical.3_pred,Microstructure_pred,Mechanical,Mechanical.1,Mechanical.2,Mechanical.3,Microstructure
0,224.421415,709.236167,511.972691,8.140139,91.0,234.0,685.0,506.0,8.59,91.0
1,224.421415,709.236167,511.972691,8.140139,91.0,216.818802,683.012456,505.305928,8.060615,91.0
2,224.042564,708.024458,512.02294,8.174879,91.0,219.0,685.0,507.0,7.69,91.0
3,223.809996,708.174192,512.011219,8.131038,91.0,226.0,715.0,516.0,8.64,91.0
4,224.421415,709.236167,511.972691,8.140139,91.0,226.256858,718.583756,514.634035,8.245829,91.0
5,222.616276,710.973661,513.400432,8.113809,91.0,227.922961,754.896051,522.937356,7.705785,91.0
6,224.788872,708.093307,511.320549,8.151146,91.0,231.0,709.0,512.0,8.24,91.0
7,223.088473,708.992558,512.485615,8.097673,91.0,225.232485,714.585415,515.568463,8.44921,91.0
