# EUR/INR Currency Trend Prediction

### Importing Necessary Libraries

In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt

### Reading the Data

In [3]:
df = pd.read_csv('EUR_INR_data.csv')
df

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,12/29/2023,91.869,91.9765,92.2030,91.8570,,-0.12%
1,12/28/2023,91.977,92.4690,92.6650,91.9360,,-0.51%
2,12/27/2023,92.446,91.8370,92.6360,91.7380,,0.68%
3,12/26/2023,91.823,91.5610,91.8690,91.5565,,0.31%
4,12/25/2023,91.539,91.6520,91.6770,91.4565,,-0.03%
...,...,...,...,...,...,...,...
255,01/06/2023,87.568,86.9070,87.8640,86.7110,101.63K,0.81%
256,01/05/2023,86.862,87.6600,87.9105,86.8285,87.73K,-0.83%
257,01/04/2023,87.591,87.3155,88.0430,87.2535,96.93K,0.30%
258,01/03/2023,87.327,88.2645,88.4380,87.2845,100.98K,-1.02%


### Data Exploration

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Date      260 non-null    object 
 1   Price     260 non-null    float64
 2   Open      260 non-null    float64
 3   High      260 non-null    float64
 4   Low       260 non-null    float64
 5   Vol.      18 non-null     object 
 6   Change %  260 non-null    object 
dtypes: float64(4), object(3)
memory usage: 14.3+ KB


In [5]:
df.describe()

Unnamed: 0,Price,Open,High,Low
count,260.0,260.0,260.0,260.0
mean,89.2762,89.295269,89.624719,89.003942
std,1.249075,1.242751,1.227101,1.250687
min,86.408,86.4215,86.7005,86.2225
25%,88.321,88.360125,88.652875,87.977
50%,89.189,89.1945,89.525,88.958
75%,90.13025,90.18775,90.484125,89.90675
max,92.446,92.469,92.665,91.9705


### Data Preprocessing

In [6]:
# the 'Date' attribute can be dropped since we're not considering the data as a time series one
# rather we want to see how the 'Price' changes based on other factors

df.drop(['Date'], axis=1, inplace=True)
df

Unnamed: 0,Price,Open,High,Low,Vol.,Change %
0,91.869,91.9765,92.2030,91.8570,,-0.12%
1,91.977,92.4690,92.6650,91.9360,,-0.51%
2,92.446,91.8370,92.6360,91.7380,,0.68%
3,91.823,91.5610,91.8690,91.5565,,0.31%
4,91.539,91.6520,91.6770,91.4565,,-0.03%
...,...,...,...,...,...,...
255,87.568,86.9070,87.8640,86.7110,101.63K,0.81%
256,86.862,87.6600,87.9105,86.8285,87.73K,-0.83%
257,87.591,87.3155,88.0430,87.2535,96.93K,0.30%
258,87.327,88.2645,88.4380,87.2845,100.98K,-1.02%


In [9]:
# we can drop the 'Vol.' attribute as well since only 18/260 records are available, so it won't add much value to the 'Price' prediction

df.drop(['Vol.'], axis=1, inplace=True)
df

Unnamed: 0,Price,Open,High,Low,Change %
0,91.869,91.9765,92.2030,91.8570,-0.12%
1,91.977,92.4690,92.6650,91.9360,-0.51%
2,92.446,91.8370,92.6360,91.7380,0.68%
3,91.823,91.5610,91.8690,91.5565,0.31%
4,91.539,91.6520,91.6770,91.4565,-0.03%
...,...,...,...,...,...
255,87.568,86.9070,87.8640,86.7110,0.81%
256,86.862,87.6600,87.9105,86.8285,-0.83%
257,87.591,87.3155,88.0430,87.2535,0.30%
258,87.327,88.2645,88.4380,87.2845,-1.02%


In [10]:
# dropping the '#' from the 'Change %' column

df['Change %'] = df['Change %'].str.replace('%', '').astype(float)
df 

Unnamed: 0,Price,Open,High,Low,Change %
0,91.869,91.9765,92.2030,91.8570,-0.12
1,91.977,92.4690,92.6650,91.9360,-0.51
2,92.446,91.8370,92.6360,91.7380,0.68
3,91.823,91.5610,91.8690,91.5565,0.31
4,91.539,91.6520,91.6770,91.4565,-0.03
...,...,...,...,...,...
255,87.568,86.9070,87.8640,86.7110,0.81
256,86.862,87.6600,87.9105,86.8285,-0.83
257,87.591,87.3155,88.0430,87.2535,0.30
258,87.327,88.2645,88.4380,87.2845,-1.02


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 260 entries, 0 to 259
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Price     260 non-null    float64
 1   Open      260 non-null    float64
 2   High      260 non-null    float64
 3   Low       260 non-null    float64
 4   Change %  260 non-null    float64
dtypes: float64(5)
memory usage: 10.3 KB


In [12]:
# feature selection

features = ['Open', 'High', 'Low', 'Change %']

X = df[features]
y = df['Price']

display(X)
display(y)

Unnamed: 0,Open,High,Low,Change %
0,91.9765,92.2030,91.8570,-0.12
1,92.4690,92.6650,91.9360,-0.51
2,91.8370,92.6360,91.7380,0.68
3,91.5610,91.8690,91.5565,0.31
4,91.6520,91.6770,91.4565,-0.03
...,...,...,...,...
255,86.9070,87.8640,86.7110,0.81
256,87.6600,87.9105,86.8285,-0.83
257,87.3155,88.0430,87.2535,0.30
258,88.2645,88.4380,87.2845,-1.02


0      91.869
1      91.977
2      92.446
3      91.823
4      91.539
        ...  
255    87.568
256    86.862
257    87.591
258    87.327
259    88.223
Name: Price, Length: 260, dtype: float64

### Splitting for Training & Testing

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Standardizing Features

In [14]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### Model Training

In [15]:
model = RandomForestRegressor(random_state=42)
model.fit(X_train_scaled, y_train)

### Making Predictions

In [16]:
y_pred = model.predict(X_test_scaled)
y_pred

array([90.56891, 90.04425, 88.30092, 90.09442, 87.21374, 87.62311,
       89.81047, 88.66808, 90.72327, 90.31198, 91.09773, 86.97023,
       89.01132, 88.2896 , 89.15007, 87.62701, 88.87036, 91.53523,
       87.57339, 90.81658, 90.66788, 92.00732, 90.38005, 89.14809,
       90.32786, 87.81369, 88.56985, 87.96227, 88.56161, 89.65056,
       89.7944 , 89.12355, 88.62592, 89.6446 , 89.46888, 90.96574,
       87.12146, 90.98573, 90.44262, 90.35764, 89.56639, 88.27929,
       91.08088, 89.19753, 90.00782, 89.48132, 89.42366, 88.17613,
       90.6657 , 90.00919, 89.29435, 87.95674])

### Evaluating Model Performance

In [17]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae:.4f}")
print(f"Mean Squared Error: {mse:.4f}")

Mean Absolute Error: 0.1496
Mean Squared Error: 0.0477


### Feature Importance

In [21]:
feature_importances = model.feature_importances_

feature_importance_dict = dict(zip(features, feature_importances))

print("Feature Importances:")
for feature, importance in sorted(feature_importance_dict.items(), key=lambda x: x[1], reverse=True):
    print(f"{feature}: {importance:.4f}")

Feature Importances:
High: 0.6117
Low: 0.3721
Change %: 0.0114
Open: 0.0048


### Checking Accuracy

In [19]:
r2 = r2_score(y_test, y_pred)
print(f"R-squared (Accuracy) Score: {r2:.4f}")

R-squared (Accuracy) Score: 0.9724


## The model for Eur/INR currency trend prediction is 97.24% accurate.

### Saving the Model & Scaler Object

In [31]:
import joblib

joblib.dump(model, 'currency_prediction_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

### Making Predictions on a New Record

In [29]:
new_input = np.array([[91.9765, 92.2030, 91.8570, -0.12]])

new_input_scaled = scaler.transform(new_input)

predicted_price = model.predict(new_input_scaled)

print(predicted_price)

[91.90717]


