In [1]:
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score

data = pd.read_csv(r"gold_price_yearly CLEANED.csv")

In [2]:
print(data.head())
print(data.dtypes)

   Year  Average Closing Price  Year Open  Year High  Year Low  \
0  1969                  41.10      41.80      43.75     35.00   
1  1970                  35.96      35.13      39.19     34.78   
2  1971                  40.80      37.33      43.90     37.33   
3  1972                  58.17      43.73      70.00     43.73   
4  1973                  97.12      64.99     127.00     64.10   

   Year Range Price  Year Close  Annual % Change  
0              8.75       35.21           -0.161  
1              4.41       37.38            0.062  
2              6.57       43.50            0.164  
3             26.27       64.70            0.487  
4             62.90      112.25            0.735  
Year                       int64
Average Closing Price    float64
Year Open                float64
Year High                float64
Year Low                 float64
Year Range Price         float64
Year Close               float64
Annual % Change          float64
dtype: object


In [4]:
data['HighChange'] = (data['Annual % Change'] > data['Annual % Change'].median()).astype(int)

features = ['Year Range Price', 'Year High', 'Year Low']
target = 'HighChange'

# Data cleaning to take out blanks
data_clean = data.dropna(subset=features + [target])

# Split the data into features (X) and target (y)
X = data_clean[features]
y = data_clean[target]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=200)

# Train a logistic regression model
clf = LogisticRegression().fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Calculate accuracy and other metrics
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("F1 Score:", f1)
print("Recall:", recall)
print("Precision:", precision)

Accuracy: 0.5294117647058824
F1 Score: 0.42857142857142855
Recall: 0.2727272727272727
Precision: 1.0


In [5]:
# Find correlation between features and target
correlation = data_clean.corr()

print(correlation)

                           Year  Average Closing Price  Year Open  Year High  \
Year                   1.000000               0.846635   0.835866   0.831088   
Average Closing Price  0.846635               1.000000   0.988972   0.995816   
Year Open              0.835866               0.988972   1.000000   0.983010   
Year High              0.831088               0.995816   0.983010   1.000000   
Year Low               0.853361               0.996486   0.989314   0.986329   
Year Range Price       0.593399               0.795387   0.766858   0.845242   
Year Close             0.842499               0.991577   0.970174   0.989994   
Annual % Change       -0.166250              -0.103270  -0.179440  -0.070086   
HighChange            -0.058220               0.047168  -0.040478   0.078535   

                       Year Low  Year Range Price  Year Close  \
Year                   0.853361          0.593399    0.842499   
Average Closing Price  0.996486          0.795387    0.991577   
Year