In [9]:
import numpy as np
import pandas as pd

# Load training data
#train_data = pd.read_csv('/content/train.csv')
#test_data = pd.read_csv('/content/test.csv')

# load data training for combined cycle power plant dataset.
train_data = pd.read_csv('/content/train.csv')
test_data = pd.read_csv('/content/test.csv')








# Check dataset structure
print(train_data.head())  # View first few rows
print(test_data.head())

     # T      V       AP     RH      EP
0   8.58  38.38  1021.03  84.37  482.26
1  21.79  58.20  1017.21  66.74  446.94
2  16.64  48.92  1011.55  78.76  452.56
3  31.38  71.32  1009.17  60.42  433.44
4   9.20  40.03  1017.05  92.46  480.38
   11.95000000  42.03000000  1017.58000000  90.89000000
0        12.07        38.25        1012.67        81.66
1        26.91        74.99        1005.64        78.98
2        20.58        39.53        1005.68        62.09
3        16.78        37.20        1011.97        68.94
4        22.46        58.49        1011.50        70.54


In [10]:
# Assuming last column is target (Y), and rest are features (X)
X_train = train_data.iloc[:, :-1].values  # Features
Y_train = train_data.iloc[:, -1].values   # Target

In [11]:
X_test = test_data.values  # Only features



from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
def compute_cost(X, Y, theta):
    m = len(Y)
    predictions = X.dot(theta)
    cost = (1 / (2 * m)) * np.sum((predictions - Y) ** 2)
    return cost

def gradient_descent(X, Y, theta, learning_rate, iterations):
    m = len(Y)
    cost_history = []

    for i in range(iterations):
        predictions = X.dot(theta)
        errors = predictions - Y
        gradient = (1 / m) * X.T.dot(errors)
        theta -= learning_rate * gradient
        cost = compute_cost(X, Y, theta)
        cost_history.append(cost)

        if i % 100 == 0:  # Print every 100 iterations
            print(f"Iteration {i}: Cost = {cost}")

    return theta, cost_history

In [13]:
# Add bias term (column of ones) to X_train and X_test
X_train = np.c_[np.ones((X_train.shape[0], 1)), X_train]
X_test = np.c_[np.ones((X_test.shape[0], 1)), X_test]

In [14]:
theta = np.zeros(X_train.shape[1])  # Initialize parameters
learning_rate = 0.01  # Experiment with different values
iterations = 1000

theta_optimal, cost_history = gradient_descent(X_train, Y_train, theta, learning_rate, iterations)
print("Optimized Parameters:", theta_optimal)

Iteration 0: Cost = 101339.89001458406
Iteration 100: Cost = 13576.883420542841
Iteration 200: Cost = 1831.484723889703
Iteration 300: Cost = 257.06670885306625
Iteration 400: Cost = 45.56595173863986
Iteration 500: Cost = 16.822308099838896
Iteration 600: Cost = 12.656414403159596
Iteration 700: Cost = 11.847140567327965
Iteration 800: Cost = 11.535757179901712
Iteration 900: Cost = 11.329109705044294
Optimized Parameters: [454.41167483 -12.01979058  -5.01258056   0.8990113   -1.39320091]


In [15]:
Y_pred = X_test.dot(theta_optimal)
print("Predictions on Test Data:", Y_pred)

Predictions on Test Data: [471.98499483 432.87370626 458.6080363  ... 439.76459693 453.58434192
 448.37047796]


In [17]:
submission = pd.DataFrame(Y_pred, columns=["Predicted"])
submission.to_csv("predictions.csv", index=False)
