In [36]:
#Develop the time/memory model using the polynomial modeling approach specified by Mr Arima. 
import pandas as pd
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
path = r"C:\Users\Aziz\Downloads\B.A.xlsx"
df = pd.read_excel(path, sheet_name="amg", header=0, nrows=21, usecols="A:F")

# Optional: set column names
df.columns = ["nx", "ny", "nz", "Grid size(nx*ny*nz)", "Execution time(cpu clock time)", "FOM_1(figure of mertit)"]
df.drop([0])
print(df.head)
# Print the resulting DataFrame
# Preprocessed data (arg1, arg2, arg3, memory_consumption)
amg1_data = df.to_numpy()
x = amg1_data[:,[0,1,2]] # Extract input parameters (arg1, arg2, arg3)
y = amg1_data[:,4] # Extract memory consumption values
print(x.shape)
print(y.shape)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

<bound method NDFrame.head of      nx   ny   nz  Grid size(nx*ny*nz)  Execution time(cpu clock time)  \
0    10   10   10                 1000                        0.016029   
1    20   20   20                 8000                        0.033401   
2    30   30   30                27000                        0.130201   
3    40   40   40                64000                        0.416461   
4    50   50   50               125000                        0.847139   
5    60   60   60               216000                        1.520439   
6    70   70   70               343000                        2.620656   
7    80   80   80               512000                        4.124697   
8    90   90   90               729000                        5.843543   
9   100  100  100              1000000                        7.603722   
10  100  110  110              1210000                        9.059771   
11  100  120  120              1440000                       12.126158   
12  100 

Next we work on cross validation to see which degree of the polynomial feature is best fit to our data


In [37]:
from sklearn.model_selection import cross_val_score

# fucntion that returns the mean of the cross val scores
def get_cross_val_score_for_degree(degree, X, y):
    poly = PolynomialFeatures(degree)
    X_poly = poly.fit_transform(X)
    model = LinearRegression()
    scores = cross_val_score(model, X_poly, y, cv=5, scoring='neg_mean_squared_error')
    return -np.mean(scores)

In [38]:
#we try polynomial degrees from 1 to 5 (inclusive) and find the degree with the lowest mean squared error.
degrees = list(range(1,6))
cv_scores = [get_cross_val_score_for_degree(degree, X_train, y_train) for degree in degrees]
#the minimum mean squared error value
optimal_degree = degrees[np.argmin(cv_scores)]

- we use the optimal degree to transform the input parameters, train the model, make predictions, and calculate the mean squared error.
- The code should now find the optimal polynomial degree using cross-validation and train a model with that degree. This should result in a more accurate and robust model.

In [39]:
poly = PolynomialFeatures(optimal_degree)
X_train_poly_optimal = poly.fit_transform(X_train)
X_test_poly_optimal = poly.transform(X_test)
model = LinearRegression().fit(X_train_poly_optimal, y_train)
y_pred_optimal = model.predict(X_test_poly_optimal)

mse_optimal = np.mean((y_test - y_pred_optimal) ** 2)
print("Mean squared error (optimal degree): ", mse_optimal)
print("Optimal degree is: ", optimal_degree)

Mean squared error (optimal degree):  0.07195642433230018
Optimal degree is:  2
