In [16]:
import pandas as pd
import numpy as np
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt


DATA_RUNTIME = Path("data/runtime")
ML_DIR = Path("ml")


for p in (DATA_RUNTIME, ML_DIR):
    p.mkdir(parents=True, exist_ok=True)


pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 120)

In [None]:
files = sorted(DATA_RUNTIME.glob("*.csv"))
print(f"Found {len(files)} daily files")

dfs = []
for f in files:
    df = pd.read_csv(f)
    dfs.append(df)

full_df = pd.concat(dfs, ignore_index=True)
full_df.head()

Found 790 daily files


Unnamed: 0,symbol,regularMarketPrice,regularMarketChangePercent,regularMarketVolume,averageDailyVolume3Month,marketCap,VolumeSpike,MomentumScore,VolumeScore,VolatilityScore,TrendScore,HotScore_today,HotScore_avg,RuntimeScore
0,URBN,77.07,12.823895,8497781.0,2098090.0,6913018000.0,4.050246,0.986979,0.994792,0.921875,0.804688,0.958464,0.958464,0.979232
1,SYM,87.51,13.222928,5076274.0,2168859.0,51875260000.0,2.340527,0.989583,0.973958,0.940104,0.820312,0.957292,0.957292,0.978646
2,ARWR,58.675,25.400724,5782060.0,2311350.0,8112262000.0,2.501594,1.0,0.976562,0.955729,0.734375,0.95638,0.95638,0.97819
3,DUOL,188.24,7.100598,2265762.0,1926600.0,8701837000.0,1.176042,0.919271,0.880208,0.960938,0.901042,0.912109,0.912109,0.956055
4,VERA,33.48,13.761466,2616403.0,1539957.0,2140312000.0,1.69901,0.994792,0.942708,0.807292,0.697917,0.909375,0.909375,0.954688


In [18]:
full_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7853 entries, 0 to 7852
Data columns (total 14 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   symbol                      7853 non-null   object 
 1   regularMarketPrice          7853 non-null   float64
 2   regularMarketChangePercent  7853 non-null   float64
 3   regularMarketVolume         7853 non-null   float64
 4   averageDailyVolume3Month    7853 non-null   float64
 5   marketCap                   7853 non-null   float64
 6   VolumeSpike                 7853 non-null   float64
 7   MomentumScore               7853 non-null   float64
 8   VolumeScore                 7853 non-null   float64
 9   VolatilityScore             7853 non-null   float64
 10  TrendScore                  7853 non-null   float64
 11  HotScore_today              7853 non-null   float64
 12  HotScore_avg                7853 non-null   float64
 13  RuntimeScore                7853 

In [None]:
full_df = (
    full_df.sort_values("RuntimeScore", ascending=False)
      .groupby("symbol", as_index=True)
      .first()
)

Unnamed: 0_level_0,regularMarketPrice,regularMarketChangePercent,regularMarketVolume,averageDailyVolume3Month,marketCap,VolumeSpike,MomentumScore,VolumeScore,VolatilityScore,TrendScore,HotScore_today,HotScore_avg,RuntimeScore
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
AAOI,39.0784,24.771390,8944166.0,5251838.0,2.668269e+09,1.703054,1.000000,0.955490,0.949555,0.578635,0.932196,0.821555,1.033434
AAON,78.6200,3.857340,1340078.0,983708.0,6.418070e+09,1.362272,0.918367,0.954082,0.959184,0.984694,0.945663,0.851665,1.028016
AAP,41.1200,4.154000,1062329.0,1715247.0,2.468115e+09,0.619345,0.900763,0.793893,0.854962,0.908397,0.854962,0.811462,0.954284
AAUC,21.1500,10.098910,1164298.0,350561.0,2.622600e+09,3.321242,0.970339,0.991525,0.805085,0.932203,0.940890,0.864736,1.014478
ABM,48.3500,5.706159,786991.0,514550.0,3.010175e+09,1.529474,0.964103,0.964103,0.810256,0.912821,0.928205,0.851536,1.009121
...,...,...,...,...,...,...,...,...,...,...,...,...,...
BMNR,31.1900,14.880300,57990880.0,44666803.0,1.328201e+10,1.298299,0.988235,0.896471,0.715294,0.143529,0.817059,0.721891,0.974445
BMRN,62.1201,19.526000,6840432.0,2623793.0,1.193416e+10,2.607078,0.990431,0.971292,0.928230,0.708134,0.943062,0.936158,0.975219
BNTX,118.0000,11.784770,2097341.0,1099086.0,2.960983e+10,1.908259,0.989059,0.870897,0.958425,0.846827,0.927352,0.814195,1.033167
BOOT,208.3200,4.825649,400238.0,528780.0,6.365348e+09,0.756908,0.847775,0.555035,0.936768,0.939110,0.772248,0.570831,1.062549


In [None]:
base_file = ML_DIR / "runtimes.csv"
full_df.to_csv(base_file)
print("Saved ML base dataset:", base_file)

Saved ML base dataset: ml\runtimes.csv
