# Polynomial Regression from Scratch

In [27]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

class PolynomialRegression:
    def __init__(self, degree=2, method='normal_equation', lr=0.001, iterations=10000):
        self.degree = degree
        self.method = method
        self.lr = lr
        self.iterations = iterations
        self.weights = None

    def create_polynomial_features(self, X):
        poly_features = [X]
        for d in range(2, self.degree + 1):
            poly_features.append(X.pow(d))
        return pd.concat(poly_features, axis=1)

    def normal_equation(self, X, y):
        X_transpose = np.transpose(X)
        X_transpose_X = np.dot(X_transpose, X)
        X_transpose_y = np.dot(X_transpose, y)
     
        try:
            theta = np.linalg.solve(X_transpose_X, X_transpose_y)
            return theta
        except np.linalg.LinAlgError:
            return None

    # def gradient_descent(self, X, y):
    #     weights = np.ones((X.shape[1], 1), dtype=np.float32)
    #     m = X.shape[0]

    #     for _ in range(self.iterations):
    #         predictions = np.zeros((972,20))
    #         errors = predictions - y
    #         gradient = np.dot(X.T, errors) / m
    #         theta = theta - self.lr * gradient
    #     return weights

    def fit(self, X, y):
        X_poly = self.create_polynomial_features(X)
        if self.method == 'normal_equation':
            self.weights = self.normal_equation(X_poly, y)
        elif self.method == 'gradient_descent':
            self.weights = self.gradient_descent(X_poly, y)
        else:
            raise ValueError("Method must be 'normal_equation' or 'gradient_descent'")

    def predict(self, X):
        X_poly = self.create_polynomial_features(X)
        return np.dot(X_poly,self.weights)

# Verileri yükleme ve hazırlama
data = pd.read_csv('processed_dataset.csv')

# Kategorik değişkeni sayısal değere dönüştürme
le = LabelEncoder()
data['city'] = le.fit_transform(data['city'])

# Drop non-feature columns from the DataFrame
X = data.drop(columns=['opened_inside_year','carried_over_from','finished','carried_over_to'])

# Separate the target variable
y = data['opened_inside_year']

# Split data into training and testing sets
# First 81 row is the year 2021
X_train, X_test, y_train, y_test = X.iloc[81:], X.iloc[:81], y.iloc[81:], y.iloc[:81]

# Normal Equation ile model oluşturma ve eğitme
model_ne = PolynomialRegression(degree=4, method='normal_equation')
model_ne.fit(X_train, y_train)

# Tahminler
y_pred_ne = model_ne.predict(X_test)

# Print predicted and real y-values for the first 10 rows
for i in range(10):
    print("Predicted:", y_pred_ne[i], "\tReal:", y_test.iloc[i])

# Calculate absolute percentage error for each prediction
absolute_percentage_errors = np.abs((y_test - y_pred_ne) / y_test)

# Calculate mean absolute percentage error
mape = np.mean(absolute_percentage_errors)

# Convert MAPE to accuracy (accuracy = 1 - MAPE)
accuracy = 1 - mape

# Convert accuracy to percentage
percentage_accuracy = accuracy * 100

print("Average Percentage Accuracy:", percentage_accuracy)

Predicted: 859529.1472389 	Real: 841644
Predicted: 62475.57063326441 	Real: 67719
Predicted: 23364.784083362305 	Real: 25475
Predicted: 16865.098660285636 	Real: 18984
Predicted: 68336.60443502475 	Real: 73542
Predicted: 28765.877648908427 	Real: 33447
Predicted: 274802.22452111426 	Real: 305541
Predicted: 82102.54144312371 	Real: 87538
Predicted: 53319.93485401559 	Real: 59477
Predicted: 68791.53297499056 	Real: 83130
Average Percentage Accuracy: 87.0214832928066
