In [91]:
# Import yfinance, pandas, numpa and matplotlib

import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler


In [92]:
pd.set_option('display.max_columns', None)
pd.set_option('max_rows', None)


In [93]:
# Read the applicants_data.csv file from the Resources folder into a Pandas DataFrame
new_model = pd.read_csv(
    Path("dataset.csv"),
    index_col = '2021 - 2023 AAPL Daily Data (Index)',
    parse_dates=True,
)

# Review the DataFrame
new_model.head()

Unnamed: 0_level_0,EBIDTA,EV/EBITDA,EPS,P/E,1 YR,5 YR,30 YR,50 MA,200 MA,News,News Score,Percent Change,Target - B/H/S (based on close - daily % change)
2021 - 2023 AAPL Daily Data (Index),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2023-06-30,31260000000,100.250802,1.53,126.777779,0.08,0.85,2.04,176.6054,154.7733,POSITIVE,0.748121,0.023103,Buy
2023-06-29,31260000000,98.047712,1.53,123.91503,0.08,0.8,1.98,176.0786,154.57575,POSITIVE,0.748121,0.001797,Buy
2023-06-28,31260000000,97.876698,1.53,123.69281,0.07,0.78,1.92,175.6162,154.4076,POSITIVE,0.999122,0.006328,Buy
2023-06-27,31260000000,97.278141,1.53,122.915031,0.08,0.79,1.93,175.1358,154.234,POSITIVE,0.977651,0.015059,Buy
2023-06-26,31260000000,95.874808,1.53,121.091506,0.07,0.7,1.81,174.6788,154.07275,POSITIVE,0.691947,-0.007553,Sell


In [94]:
#Change news sentiment to numeric values
new_model['News'] = new_model['News'].replace({'POSITIVE': 1, 'NEGATIVE': -1, 'NEUTRAL': 0})

In [95]:
new_model.head()

Unnamed: 0_level_0,EBIDTA,EV/EBITDA,EPS,P/E,1 YR,5 YR,30 YR,50 MA,200 MA,News,News Score,Percent Change,Target - B/H/S (based on close - daily % change)
2021 - 2023 AAPL Daily Data (Index),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2023-06-30,31260000000,100.250802,1.53,126.777779,0.08,0.85,2.04,176.6054,154.7733,1,0.748121,0.023103,Buy
2023-06-29,31260000000,98.047712,1.53,123.91503,0.08,0.8,1.98,176.0786,154.57575,1,0.748121,0.001797,Buy
2023-06-28,31260000000,97.876698,1.53,123.69281,0.07,0.78,1.92,175.6162,154.4076,1,0.999122,0.006328,Buy
2023-06-27,31260000000,97.278141,1.53,122.915031,0.08,0.79,1.93,175.1358,154.234,1,0.977651,0.015059,Buy
2023-06-26,31260000000,95.874808,1.53,121.091506,0.07,0.7,1.81,174.6788,154.07275,1,0.691947,-0.007553,Sell


In [96]:
#Change 'Target - B/H/S (based on close - daily % change)' to numeric values
new_model['Target - B/H/S (based on close - daily % change)'] = new_model['Target - B/H/S (based on close - daily % change)'].replace({'Buy': 1, 'Hold': 0, 'Sell': -1})

In [97]:
#delete columns not needed

del new_model["EPS"]
del new_model["30 YR"]
del new_model["200 MA"]

new_model.head()

Unnamed: 0_level_0,EBIDTA,EV/EBITDA,P/E,1 YR,5 YR,50 MA,News,News Score,Percent Change,Target - B/H/S (based on close - daily % change)
2021 - 2023 AAPL Daily Data (Index),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-06-30,31260000000,100.250802,126.777779,0.08,0.85,176.6054,1,0.748121,0.023103,1
2023-06-29,31260000000,98.047712,123.91503,0.08,0.8,176.0786,1,0.748121,0.001797,1
2023-06-28,31260000000,97.876698,123.69281,0.07,0.78,175.6162,1,0.999122,0.006328,1
2023-06-27,31260000000,97.278141,122.915031,0.08,0.79,175.1358,1,0.977651,0.015059,1
2023-06-26,31260000000,95.874808,121.091506,0.07,0.7,174.6788,1,0.691947,-0.007553,-1


In [98]:
#Drop NA values
new_model.dropna(inplace=True)
new_model.head()

Unnamed: 0_level_0,EBIDTA,EV/EBITDA,P/E,1 YR,5 YR,50 MA,News,News Score,Percent Change,Target - B/H/S (based on close - daily % change)
2021 - 2023 AAPL Daily Data (Index),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-06-30,31260000000,100.250802,126.777779,0.08,0.85,176.6054,1,0.748121,0.023103,1
2023-06-29,31260000000,98.047712,123.91503,0.08,0.8,176.0786,1,0.748121,0.001797,1
2023-06-28,31260000000,97.876698,123.69281,0.07,0.78,175.6162,1,0.999122,0.006328,1
2023-06-27,31260000000,97.278141,122.915031,0.08,0.79,175.1358,1,0.977651,0.015059,1
2023-06-26,31260000000,95.874808,121.091506,0.07,0.7,174.6788,1,0.691947,-0.007553,-1


In [99]:
X = new_model.iloc[:, :-1]  # Features (all columns except the last one)
y = new_model.iloc[:, -1]   # Target variable (last column)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Creating and training the RandomForestClassifier
new_model = RandomForestClassifier(n_estimators=100, min_samples_split=100, random_state=1)
new_model.fit(X_train, y_train)



RandomForestClassifier(min_samples_split=100, random_state=1)

In [100]:
# Evaluating the model on the test set
accuracy = new_model.score(X_test, y_test)

In [101]:
accuracy

1.0

In [102]:
# Set the random seed for reproducibility
np.random.seed(1)

# Define the number of data points you want to generate
num_data_points = 100

# Generate synthetic data for each feature

feature2 = np.random.uniform(low=0, high=100000000, size=num_data_points)  # EBIDTA
feature3 = np.random.uniform(low=0, high=200, size=num_data_points)  # EV/EBITDA
feature4 = np.random.uniform(low=0, high=200, size=num_data_points)  # P/E
feature5 = np.random.uniform(low=0, high=1, size=num_data_points)  # 1 YR
feature6 = np.random.uniform(low=0, high=1, size=num_data_points)  # 5 YR
feature7 = np.random.uniform(low=0, high=500, size=num_data_points)  # 50 MA
feature8 = np.random.choice([-1, 0, 1], size=num_data_points)  # News (Assuming categorical values -1, 0, 1)
feature9 = np.random.uniform(low=0, high=1, size=num_data_points)  # News Score
feature10 = np.random.uniform(low=-0.1, high=0.1, size=num_data_points)  # Percent Change

# Create the new data array by combining all the features
new_data = np.column_stack((feature2, feature3, feature4, feature5, feature6, feature7, feature8, feature9, feature10))

# Print the new data array
print(new_data)

[[ 4.17022005e+07  6.53289804e+01  1.90035224e+02  8.11858698e-01
   9.59434321e-01  4.37411048e+01 -1.00000000e+00  2.61264211e-01
  -9.94906920e-02]
 [ 7.20324493e+07  1.05411620e+02  1.11330638e+02  8.74961645e-01
   8.03960891e-01  1.13654868e+02 -1.00000000e+00  9.82055582e-01
  -9.71992887e-02]
 [ 1.14374817e+04  1.77188420e+02  1.83121270e+02  6.88413252e-01
   3.23230666e-02  1.57188308e+02  0.00000000e+00  9.28057640e-01
  -5.64777795e-02]
 [ 3.02332573e+07  7.14539520e+01  1.28313242e+02  5.69494413e-01
   7.09387251e-01  8.73829382e+01 -1.00000000e+00  3.35672390e-01
   8.09691559e-02]
 [ 1.46755891e+07  1.81707030e+02  7.80015428e+01  1.60971437e-01
   4.65001482e-01  3.03547081e+02  1.00000000e+00  7.04562198e-01
  -8.73457048e-02]
 [ 9.23385948e+06  1.24672023e+02  9.71981334e+01  4.66880023e-01
   9.47548941e-01  2.06793208e+02  0.00000000e+00  6.49134463e-01
  -4.15401810e-02]
 [ 1.86260211e+07  3.16424857e+00  1.20862097e+02  3.45172051e-01
   2.21432734e-01  4.0817575

In [103]:
from sklearn.metrics import precision_score
# Make predictions on new_data using the trained model
predictions = new_model.predict(X_test)

# Print the predictions
print(predictions)

[-1 -1  1 -1  1 -1 -1  1  1  1  1  1 -1  1 -1 -1  1  1  1 -1  1 -1  1  1
  1  1  1 -1  1 -1 -1 -1  1 -1 -1  1  1 -1  1 -1  1  1  1  1 -1 -1  1 -1
 -1  1  1  1 -1 -1 -1  1 -1 -1  1 -1 -1 -1  1 -1 -1 -1 -1  1 -1  1  1 -1
 -1 -1  1  1 -1  1 -1 -1 -1  1  1  1  1 -1  1 -1  1  1  1  1  1  1 -1  1
 -1  1  1]


In [104]:
# Calculate the precision score
precision = precision_score(y_test, predictions)

# Print the precision score
print("Precision Score:", precision)

Precision Score: 1.0
