In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [2]:
cleaned_stock_data = pd.read_csv('/content/cleaned_stock_data.csv')
cleaned_stock_data.head()

Unnamed: 0,date,open_value,high_value,low_value,last_value,change_prev_close_percentage,turnover
0,2015-12-30,1689.22,1689.71,1673.62,1689.63,-0.02,2017520.82
1,2015-12-29,1675.79,1691.02,1673.37,1689.94,0.84,1094356.06
2,2015-12-28,1655.92,1677.17,1652.76,1675.88,1.21,1125687.29
3,2015-12-23,1647.66,1655.77,1641.41,1655.77,0.49,592284.75
4,2015-12-22,1655.71,1655.71,1642.6,1647.67,-0.55,2714509.05


In [4]:
# Convert 'date' to datetime format
cleaned_stock_data['date'] = pd.to_datetime(cleaned_stock_data['date'])

In [5]:
# Feauture Engineering
def feature_engineering(df):
    try:
        df['year'] = df['date'].dt.year
        df['month'] = df['date'].dt.month
        df['day'] = df['date'].dt.day
        print("Feature engineering completed.")
        return df
    except Exception as e:
        print(f"Error in feature engineering: {e}")
        return df


In [6]:
# Feature Scaleing
def scale_features(df, features):
    try:
        scaler = MinMaxScaler()
        df[features] = scaler.fit_transform(df[features])
        print("Features scaled successfully.")
        return df
    except Exception as e:
        print(f"Error in scaling features: {e}")
        return df

In [10]:
cleaned_stock_data.drop(columns=['date']) # Droping 'date' column fron the dataframe

# Applying "feature_engineering" function
cleaned_stock_data = feature_engineering(cleaned_stock_data)

# Applying "scale_features" function
numeric_columns = cleaned_stock_data.select_dtypes(include=['number']).columns.tolist()
scaled_stock_data = scale_features(cleaned_stock_data, numeric_columns)

scaled_stock_data

Feature engineering completed.
Features scaled successfully.


Unnamed: 0,date,open_value,high_value,low_value,last_value,change_prev_close_percentage,turnover,year,month,day
0,2015-12-30,0.097132,0.094058,0.085668,0.098375,0.341176,0.065958,1.0,1.0,0.966667
1,2015-12-29,0.078355,0.095888,0.085317,0.098809,0.404412,0.035777,1.0,1.0,0.933333
2,2015-12-28,0.050573,0.076543,0.056339,0.079128,0.431618,0.036801,1.0,1.0,0.900000
3,2015-12-23,0.039024,0.046652,0.040381,0.050979,0.378676,0.019363,1.0,1.0,0.733333
4,2015-12-22,0.050279,0.046568,0.042054,0.039641,0.302206,0.088744,1.0,1.0,0.700000
...,...,...,...,...,...,...,...,...,...,...
1493,2010-01-11,0.643979,0.689457,0.657495,0.689828,0.475000,0.106085,0.0,0.0,0.333333
1494,2010-01-08,0.631171,0.631924,0.632116,0.637505,0.395588,0.087079,0.0,0.0,0.233333
1495,2010-01-07,0.612813,0.633140,0.609409,0.616663,0.375735,0.077305,0.0,0.0,0.200000
1496,2010-01-05,0.566365,0.627748,0.577141,0.603771,0.441912,0.138485,0.0,0.0,0.133333


In [11]:
# Save preprocessed data
scaled_stock_data.to_csv('preprocessed_stock_data.csv', index=False)