# Imports Section.

In [30]:
"""Standard library imports."""

import pandas as pd  # Pandas for data manipulation and analysis.
from sklearn.preprocessing import MinMaxScaler   # MinMaxScaler from scikit-learn for feature scaling
from sklearn.model_selection import train_test_split  # train_test_split from scikit-learn for splitting the dataset
from sklearn.linear_model import LinearRegression   # LinearRegression from scikit-learn for linear regression modeling
from sklearn.linear_model import Ridge  # Ridge regression is a linear regression model with L2 regularization
from sklearn.model_selection import cross_val_score  # cross_val_score for cross-validation
from sklearn.feature_selection import RFE  # Recursive Feature Elimination (RFE) for feature selection

# Read CSV


In [2]:
# Read the CSV file into a Pandas DataFrame.
# The read_csv function reads the data from the CSV file and stores it in a tabular format.
# You can specify various parameters like delimiter, header, and encoding based on the file's characteristics.


data = pd.read_csv("/content/data/local.csv")

In [3]:
# Display the first few rows of the 'data' DataFrame.
# The head() method is used to show a preview of the data, which is useful for initial exploration.
# By default, it displays the first 5 rows, but you can specify the number of rows by passing an argument.

data.head()

Unnamed: 0,24K - Local Price/Sell,24K - Local Price/Buy,22K - Local Price/Sell,22K - Local Price/Buy,21K - Local Price/Sell,21K - Local Price/Buy,18K - Local Price/Sell,18K - Local Price/Buy,14K - Local Price/Sell,14K - Local Price/Buy,12K - Local Price/Sell,12K - Local Price/Buy
0,1394.0,1401.0,1278.0,1284.0,1220.0,1226.0,1046.0,1051.0,813.0,817.0,697.0,701.0
1,1398.0,1402.0,1281.0,1285.0,1223.0,1227.0,1048.0,1052.0,815.0,818.0,699.0,701.0
2,1431.0,1435.0,1312.0,1316.0,1252.0,1256.0,1073.0,1077.0,835.0,837.0,715.0,718.0
3,1446.0,1457.0,1325.0,1336.0,1265.0,1275.0,1084.0,1093.0,843.0,850.0,723.0,729.0
4,1429.0,1440.0,1310.0,1320.0,1250.0,1260.0,1071.0,1080.0,833.0,840.0,714.0,720.0




# spliting data


In [4]:
# Extracting sell prices as labels
# iloc[:, [0, 2, 4, 6, 8, 10]] selects all rows (:) and columns at indices 0, 2, 4, 6, 8, 10

labels = data.iloc[:, [0, 2, 4, 6, 8, 10]]
labels.head()

Unnamed: 0,24K - Local Price/Sell,22K - Local Price/Sell,21K - Local Price/Sell,18K - Local Price/Sell,14K - Local Price/Sell,12K - Local Price/Sell
0,1394.0,1278.0,1220.0,1046.0,813.0,697.0
1,1398.0,1281.0,1223.0,1048.0,815.0,699.0
2,1431.0,1312.0,1252.0,1073.0,835.0,715.0
3,1446.0,1325.0,1265.0,1084.0,843.0,723.0
4,1429.0,1310.0,1250.0,1071.0,833.0,714.0


In [5]:
# Extracting buy prices as features
# iloc[:, [1, 3, 5, 7, 9, 11]] selects all rows (:) and columns at indices 1, 3, 5, 7, 9, 11

features = data.iloc[:, [1, 3, 5, 7, 9, 11]]
features.head()

Unnamed: 0,24K - Local Price/Buy,22K - Local Price/Buy,21K - Local Price/Buy,18K - Local Price/Buy,14K - Local Price/Buy,12K - Local Price/Buy
0,1401.0,1284.0,1226.0,1051.0,817.0,701.0
1,1402.0,1285.0,1227.0,1052.0,818.0,701.0
2,1435.0,1316.0,1256.0,1077.0,837.0,718.0
3,1457.0,1336.0,1275.0,1093.0,850.0,729.0
4,1440.0,1320.0,1260.0,1080.0,840.0,720.0


# preprocessing

In [6]:
# Creating a MinMaxScaler instance
scale = MinMaxScaler()

# Transforming the features using Min-Max scaling
# fit_transform() scales the features to a specified range (default is [0, 1])

features = scale.fit_transform(features)
features


array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00],
       [5.74052813e-04, 6.26174076e-04, 6.56167979e-04, 7.65696784e-04,
        9.84251969e-04, 0.00000000e+00],
       [1.95177956e-02, 2.00375704e-02, 1.96850394e-02, 1.99081164e-02,
        1.96850394e-02, 1.95402299e-02],
       ...,
       [8.75430540e-01, 8.75391359e-01, 8.75328084e-01, 8.75191424e-01,
        8.75984252e-01, 8.75862069e-01],
       [8.58783008e-01, 8.59110833e-01, 8.58923885e-01, 8.59111792e-01,
        8.59251969e-01, 8.59770115e-01],
       [8.88633754e-01, 8.88541014e-01, 8.88451444e-01, 8.88208270e-01,
        8.88779528e-01, 8.88505747e-01]])

In [7]:
scale2 = MinMaxScaler()

# Transforming the labels using Min-Max scaling
# fit_transform() scales the labels to a specified range (default is [0, 1])

labels = scale2.fit_transform(labels)
labels

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.00236407, 0.00193424, 0.00202703, 0.00157729, 0.00202634,
        0.00236407],
       [0.02186761, 0.02192134, 0.02162162, 0.02129338, 0.02228977,
        0.0212766 ],
       ...,
       [0.87825059, 0.87814313, 0.87837838, 0.8785489 , 0.87841945,
        0.87825059],
       [0.87470449, 0.87491941, 0.875     , 0.87539432, 0.87537994,
        0.87470449],
       [0.90543735, 0.90522244, 0.90540541, 0.90536278, 0.90577508,
        0.90543735]])

In [9]:
# Splitting the dataset into training and testing sets
# features and labels are split into x_train, x_test, y_train, and y_test
# test_size=0.2 indicates that 20% of the data will be used for testing, and 80% for training
x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.2)


In [15]:
x_train

array([[0.61595867, 0.61615529, 0.61614173, 0.61638591, 0.61614173,
        0.61609195],
       [0.65556831, 0.65560426, 0.65551181, 0.65543645, 0.65551181,
        0.65517241],
       [0.61825488, 0.61803381, 0.61811024, 0.6179173 , 0.61811024,
        0.6183908 ],
       ...,
       [0.86854191, 0.86850344, 0.8687664 , 0.86906585, 0.86909449,
        0.86896552],
       [0.61136625, 0.61177207, 0.61154856, 0.61179173, 0.61220472,
        0.61149425],
       [0.82950631, 0.82968065, 0.82939633, 0.82924962, 0.82972441,
        0.82988506]])

In [11]:
x_test

array([[0.37657865, 0.37695679, 0.37664042, 0.37672282, 0.3769685 ,
        0.37701149],
       [0.37657865, 0.37695679, 0.37664042, 0.37672282, 0.3769685 ,
        0.37701149],
       [0.66532721, 0.66562304, 0.66535433, 0.66539051, 0.66535433,
        0.66551724],
       [0.40929966, 0.40951785, 0.40944882, 0.40964778, 0.40944882,
        0.4091954 ],
       [0.65556831, 0.65560426, 0.65551181, 0.65543645, 0.65551181,
        0.65517241],
       [0.70493685, 0.70507201, 0.70472441, 0.70444104, 0.70472441,
        0.7045977 ],
       [0.64580941, 0.64558547, 0.64566929, 0.64548239, 0.64566929,
        0.64597701],
       [0.74397245, 0.7438948 , 0.74409449, 0.74425727, 0.74409449,
        0.74482759],
       [0.63605052, 0.63619286, 0.63582677, 0.63552833, 0.63582677,
        0.63563218],
       [0.64580941, 0.64558547, 0.64566929, 0.64548239, 0.64566929,
        0.64597701],
       [0.36337543, 0.36380714, 0.36351706, 0.36370597, 0.36417323,
        0.36321839],
       [0.31113662, 0

In [12]:
y_train

array([[0.63179669, 0.63185042, 0.63175676, 0.63170347, 0.63221884,
        0.63120567],
       [0.67198582, 0.67182463, 0.6722973 , 0.67271293, 0.67274569,
        0.67257683],
       [0.63356974, 0.63378466, 0.63378378, 0.6340694 , 0.63424519,
        0.63356974],
       ...,
       [0.88475177, 0.88459059, 0.88513514, 0.88564669, 0.88551165,
        0.88534279],
       [0.62825059, 0.62798195, 0.62837838, 0.6285489 , 0.62816616,
        0.62884161],
       [0.83747045, 0.83752418, 0.83783784, 0.83832808, 0.8378926 ,
        0.83806147]])

In [13]:
y_test

array([[0.37825059, 0.37846551, 0.37837838, 0.3785489 , 0.37892604,
        0.37825059],
       [0.37174941, 0.37137331, 0.37162162, 0.3714511 , 0.37183384,
        0.37115839],
       [0.67553191, 0.6756931 , 0.67567568, 0.67586751, 0.67578521,
        0.67612293],
       [0.40543735, 0.40554481, 0.40540541, 0.40536278, 0.40526849,
        0.40543735],
       [0.66548463, 0.66537718, 0.66554054, 0.66561514, 0.6656535 ,
        0.66548463],
       [0.70921986, 0.70921986, 0.70945946, 0.70977918, 0.70921986,
        0.70921986],
       [0.64834515, 0.6486138 , 0.64864865, 0.64905363, 0.64842958,
        0.64893617],
       [0.75      , 0.74983881, 0.75      , 0.75      , 0.74974671,
        0.74940898],
       [0.65189125, 0.65183752, 0.65202703, 0.6522082 , 0.65248227,
        0.65248227],
       [0.65543735, 0.65506125, 0.65540541, 0.65536278, 0.65552178,
        0.65484634],
       [0.35165485, 0.3513862 , 0.35135135, 0.35094637, 0.35157042,
        0.35106383],
       [0.3108747 , 0

In [16]:
print(len(features))

349


In [17]:
print(len(x_train))

279


In [18]:
print(len(x_test))

70


# LinearRegression

In [20]:
model = LinearRegression()
model.fit(x_train,y_train)

In [21]:
# Evaluating the model's performance on the test set
# model.score(x_test, y_test) computes the coefficient of determination (R²)
# to assess how well the model predicts the target variable y_test based on the features x_test

model.score(x_test,y_test)

0.9986909084801693

# Regularization with Ridge Regression

In [23]:
# Create a Ridge regression model with regularization strength (alpha)
ridge_model = Ridge(alpha=0.1)

# Train the model
ridge_model.fit(x_train, y_train)

# Evaluate the model
ridge_score = ridge_model.score(x_test, y_test)
print("Ridge Regression Score:", ridge_score)


Ridge Regression Score: 0.998671985226557


# Cross-Validation

In [27]:
# Perform cross-validation to get a more robust estimate of performance
scores = cross_val_score(LinearRegression(), features, labels, cv=5)
print("Cross-Validation Scores:", scores)


Cross-Validation Scores: [-1.54200437e+18  9.94783317e-01  9.93674068e-01  9.71987347e-01
  9.93305599e-01]


# Feature Selection with Recursive Feature Elimination (RFE)

In [29]:
# Use RFE for feature selection
model = LinearRegression()
rfe = RFE(model, n_features_to_select=6)  # Adjust the number of features as needed
x_train_rfe = rfe.fit_transform(x_train, y_train)
x_test_rfe = rfe.transform(x_test)

# Train and evaluate the model with selected features
model.fit(x_train_rfe, y_train)
rfe_score = model.score(x_test_rfe, y_test)
print("RFE Score:", rfe_score)


RFE Score: 0.9986909084801695
