<a href="https://colab.research.google.com/github/gulshan0201/DATA-Science/blob/main/ML_EXP_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# Dataset
X, y = make_regression(n_samples=1000, n_features=5, n_informative=5, noise=15, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
print("Linear Regression R² Score:", lr.score(X_test, y_test))

# Polynomial Regression (degree 3)
poly_model = make_pipeline(PolynomialFeatures(degree=3), LinearRegression())
poly_model.fit(X_train, y_train)
print("Polynomial Regression (deg=3) R² Score:", poly_model.score(X_test, y_test))

# SVR with RBF Kernel
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svr = SVR(kernel='rbf', C=100, gamma=0.1)
svr.fit(X_train_scaled, y_train)
print("SVR (RBF) R² Score:", svr.score(X_test_scaled, y_test))


Linear Regression R² Score: 0.9442031815195597
Polynomial Regression (deg=3) R² Score: 0.9406100492181844
SVR (RBF) R² Score: 0.9374296718852451


# Based on a real dataset COMEX Gold Futures Dataset (GC Contract)

In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import numpy as np

# Load the dataset
file_name = "GC_in_15_minute_new.csv"

try:
    df = pd.read_csv(file_name)

    # --- Preprocessing ---
    df.dropna(inplace=True)

    # Convert datetime to a numeric feature (Unix timestamp)
    df['datetime'] = pd.to_datetime(df['datetime'])
    df['timestamp'] = df['datetime'].astype('int64') // 10**9

    # --- Define features (X) and target (y) ---
    features = ['open', 'high', 'low', 'volume', 'timestamp']
    target = 'close'

    X = df[features]
    y = df[target]

    # --- Split the data ---
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    print(f"Training with {len(X_train)} samples, Testing with {len(X_test)} samples.")

    # --- Model 1: Linear Regression ---
    print("\n--- Training Linear Regression ---")
    lr = LinearRegression()
    lr.fit(X_train, y_train)
    print("Linear Regression R² Score:", lr.score(X_test, y_test))

    # --- Model 2: Polynomial Regression (degree 2) ---
    print("\n--- Training Polynomial Regression (deg=2) ---")
    poly_model = make_pipeline(PolynomialFeatures(degree=2), LinearRegression())
    poly_model.fit(X_train, y_train)
    print("Polynomial Regression (deg=2) R² Score:", poly_model.score(X_test, y_test))

    # --- Model 3: SVR with RBF Kernel ---
    print("\n--- Training SVR (RBF) ---")
    # SVR requires features to be on a similar scale
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Using the same parameters as the example
    svr = SVR(kernel='rbf', C=100, gamma=0.1)
    svr.fit(X_train_scaled, y_train)
    print("SVR (RBF) R² Score:", svr.score(X_test_scaled, y_test))

except FileNotFoundError:
    print(f"Error: The file '{file_name}' was not found.")
except Exception as e:
    print(f"An error occurred: {e}")

Training with 5580 samples, Testing with 1395 samples.

--- Training Linear Regression ---
Linear Regression R² Score: 0.9999483527876615

--- Training Polynomial Regression (deg=2) ---
Polynomial Regression (deg=2) R² Score: 0.9999470827116347

--- Training SVR (RBF) ---
SVR (RBF) R² Score: 0.9998079395523923


# Task 2

In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import numpy as np

# Load the dataset
file_name = "GC_in_15_minute_new.csv"

try:
    df = pd.read_csv(file_name)

    # --- Preprocessing ---
    df.dropna(inplace=True)
    df['datetime'] = pd.to_datetime(df['datetime'])
    df['timestamp'] = df['datetime'].astype('int64') // 10**9

    features = ['open', 'high', 'low', 'volume', 'timestamp']
    target = 'close'

    X = df[features]
    y = df[target]

    # --- Split the data ---
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    print(f"Training with {len(X_train)} samples, Testing with {len(X_test)} samples.\n")

    # --- Model 1: Linear Regression (Baseline) ---
    print("--- Linear Regression ---")
    lr = LinearRegression()
    lr.fit(X_train, y_train)
    print(f"Linear Regression R² Score: {lr.score(X_test, y_test):.6f}\n")

    # --- Model 2: Polynomial Regression (Varying degree) ---
    print("--- Polynomial Regression ---")

    # Degree 2
    poly_model_2 = make_pipeline(PolynomialFeatures(degree=2), LinearRegression())
    poly_model_2.fit(X_train, y_train)
    print(f"Polynomial Regression (deg=2) R² Score: {poly_model_2.score(X_test, y_test):.6f}")

    # Degree 3
    poly_model_3 = make_pipeline(PolynomialFeatures(degree=3), LinearRegression())
    poly_model_3.fit(X_train, y_train)
    print(f"Polynomial Regression (deg=3) R² Score: {poly_model_3.score(X_test, y_test):.6f}\n")

    # --- Model 3: SVR with RBF Kernel (Varying C and gamma) ---
    print("--- SVR (RBF) ---")
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # SVR 1 (Original Parameters)
    svr_1 = SVR(kernel='rbf', C=100, gamma=0.1)
    svr_1.fit(X_train_scaled, y_train)
    print(f"SVR (C=100, gamma=0.1) R² Score: {svr_1.score(X_test_scaled, y_test):.6f}")

    # SVR 2 (Common Defaults)
    svr_2 = SVR(kernel='rbf', C=1.0, gamma='scale')
    svr_2.fit(X_train_scaled, y_train)
    print(f"SVR (C=1.0, gamma='scale') R² Score: {svr_2.score(X_test_scaled, y_test):.6f}")

    # SVR 3 (High C, default gamma)
    svr_3 = SVR(kernel='rbf', C=1000, gamma='scale')
    svr_3.fit(X_train_scaled, y_train)
    print(f"SVR (C=1000, gamma='scale') R² Score: {svr_3.score(X_test_scaled, y_test):.6f}")

except FileNotFoundError:
    print(f"Error: The file '{file_name}' was not found.")
except Exception as e:
    print(f"An error occurred: {e}")

Training with 5580 samples, Testing with 1395 samples.

--- Linear Regression ---
Linear Regression R² Score: 0.999948

--- Polynomial Regression ---
Polynomial Regression (deg=2) R² Score: 0.999947
Polynomial Regression (deg=3) R² Score: 0.999947

--- SVR (RBF) ---
SVR (C=100, gamma=0.1) R² Score: 0.999808
SVR (C=1.0, gamma='scale') R² Score: 0.959068
SVR (C=1000, gamma='scale') R² Score: 0.999867
