In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import pandas as pd


In [None]:
original_data = pd.read_excel('/content/OriginalData.xlsx')
rows, columns = original_data.shape
print(f"Number of rows: {rows}")
print(f"Number of columns: {columns}")

Number of rows: 77
Number of columns: 16


In [None]:
X = original_data[['calories', 'protein', 'fat', 'sodium', 'fiber', 'carbo', 'sugars', 'potass', 'vitamins', 'weight', 'cups']]
y = original_data['rating']

In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Adjusted R-square calculations
def adjusted_r2(r2, n, p):
    return 1 - (1 - r2) * ((n - 1) / (n - p - 1))

remaining_features = list(X.columns)
best_features = remaining_features[:]
highest_adj_r2 = float('-inf')

# Start with all features and iteratively remove features based on adjusted R^2
while len(remaining_features) > 0:
    feature_to_remove = None

    # Track adjusted R^2 for each feature removal option
    for feature in remaining_features:
        selected_features = [f for f in remaining_features if f != feature]
        X_train_subset = X_train[selected_features]
        X_test_subset = X_test[selected_features]

        # Train the model
        model = LinearRegression()
        model.fit(X_train_subset, y_train)

        # Calculate R-square on the test set
        r2 = model.score(X_test_subset, y_test)

        # Calculate adjusted R2
        adj_r2 = adjusted_r2(r2, len(y_test), len(selected_features))

        if adj_r2 > highest_adj_r2:
            highest_adj_r2 = adj_r2
            feature_to_remove = feature
            best_features = selected_features

    # Stop when R2 decreases
    if feature_to_remove is None:
        break

    # Remove feature
    remaining_features.remove(feature_to_remove)

print("Best Feature Subset:", best_features)
print("Highest Adjusted R-squared:", highest_adj_r2)


Best Feature Subset: ['calories', 'protein', 'fat', 'sodium', 'fiber', 'carbo', 'sugars', 'potass', 'vitamins']
Highest Adjusted R-squared: 0.9999999999999987
