In [84]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor

In [85]:
df = pd.read_csv('/workspaces/setproject/set1.csv')

In [86]:
df.dropna(inplace=True)

In [87]:
df['Date'] = pd.to_datetime(df['Date']).dt.month

In [88]:
features = ['Open', 'High','Low']  # Example features
target = 'Price'
df['Change'] = df['Price'].diff()  # Calculate price change
df['Up/Down'] = (df['Change'] > 0).astype(int)


In [89]:
X_train,X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.3, random_state=42)
model = DecisionTreeRegressor(max_depth=5, min_samples_split=2)

In [90]:
print(y_test)

812     1460.6
1197    1294.5
765     1563.6
322     1760.9
2475    1411.9
         ...  
24      1755.3
162     1844.0
1610    1253.0
335     1755.7
1232    1344.7
Name: Price, Length: 774, dtype: float64


In [91]:
print(model)

DecisionTreeRegressor(max_depth=5)


In [92]:
X_trainnew=X_train.dropna()
y_trainnew=y_train.dropna()

In [93]:
model.fit(X_trainnew, y_trainnew)

In [94]:
print(X_testnew)

      Date   Price
812   1970  1460.6
1197  1970  1294.5
765   1970  1563.6
322   1970  1760.9
2475  1970  1411.9
...    ...     ...
24    1970  1755.3
162   1970  1844.0
1610  1970  1253.0
335   1970  1755.7
1232  1970  1344.7

[774 rows x 2 columns]


In [95]:
X_testnew=X_test.dropna()

In [96]:
y_pred = model.predict(X_testnew)

In [97]:
print(y_pred)

[1477.03714286 1285.83141026 1559.72       1745.15454545 1401.8
 1671.71428571 1509.34418605 1285.83141026 1194.66015625 1162.66595745
 1305.27128713 1221.10194175 1671.71428571 1378.07575758 1243.64893617
 1873.59655172 1873.59655172 1305.27128713 1401.8        1243.64893617
 1776.55555556 1648.68181818 1243.64893617 1745.15454545 1243.64893617
 1509.34418605 1671.71428571 1671.71428571 1265.78130841 1305.27128713
 1559.72       1331.77058824 1265.78130841 1285.83141026 1944.16818182
 1559.72       1718.84313725 1243.64893617 1671.71428571 1243.64893617
 1745.15454545 1999.89       1899.4097561  1243.64893617 1194.66015625
 1243.64893617 1671.71428571 1162.66595745 1718.84313725 1477.03714286
 1745.15454545 1331.77058824 1194.66015625 1265.78130841 1331.77058824
 1824.26769231 1194.66015625 1559.72       1331.77058824 1162.66595745
 1285.83141026 1221.10194175 1671.71428571 1853.23823529 1285.83141026
 1559.72       1285.83141026 1718.84313725 1097.01290323 1509.34418605
 1595.9925925

In [98]:
from sklearn.metrics import r2_score, mean_squared_error

In [99]:
y_test_filtered = y_test.dropna()
y_pred_series = pd.Series(y_pred)
y_pred_filtered = y_pred_series.dropna()
print(y_pred_series)

0      1477.037143
1      1285.831410
2      1559.720000
3      1745.154545
4      1401.800000
          ...     
769    1776.555556
770    1824.267692
771    1265.781308
772    1745.154545
773    1331.770588
Length: 774, dtype: float64


In [100]:
print(y_test_filtered)

812     1460.6
1197    1294.5
765     1563.6
322     1760.9
2475    1411.9
         ...  
24      1755.3
162     1844.0
1610    1253.0
335     1755.7
1232    1344.7
Name: Price, Length: 774, dtype: float64


In [101]:
r2 = r2_score(y_test_filtered, y_pred_filtered)
mse = mean_squared_error(y_test_filtered, y_pred_filtered)

In [102]:
print(f"R-squared: {r2:.4f}")
print(f"Mean Squared Error: {mse:.2f}")

R-squared: 0.9979
Mean Squared Error: 144.80


In [103]:
importances = model.feature_importances_
feature_names = features

for feature, importance in zip(feature_names, importances):
    print(f"{feature}: {importance:.4f}")

Open: 0.0000
High: 0.0192
Low: 0.9808
