In [None]:
import os
import glob
import csv
from datetime import datetime, timedelta

import random
import json

import math

In [None]:
DATE_FORMAT = "%d-%m-%Y"
NULL_FIELD_VALUES = ['nan', 'nat', 'null', 'none']

consecutive_timestamps = 10

In [288]:
csv_files = glob.glob('stock_price_data_files/**/*.csv')

In [426]:
csv_files = {p.name: sorted([f.path for f in os.scandir(p.path) if os.path.splitext(f.name)[-1] == ".csv"]) for p in os.scandir('stock_price_data_files/') if os.path.isdir(p.path)}
csv_files = [x for v in csv_files.values() for x in v[:n_files]]

In [434]:
file_path = './stock_price_data_files/LSE/FLTR.csv'

def read_csv(file_path, has_header=False):
    data = list()

    with open(file_path, 'r') as f:
        x = csv.reader(f)
        
        for (_, _row) in enumerate(x):
            data.append(_row)
            
    if has_header:
        return data[1:]
    else:
        return data            

In [330]:
def to_datetime(date_str, date_format=DATE_FORMAT):
    return datetime.strptime(date_str, date_format)
    
def is_valid_date(date_str, date_format=DATE_FORMAT):
    try:
        to_datetime(date_str, date_format)
        return True
    except ValueError:
        if date_str.lower() in NULL_FIELD_VALUES:
            return True
        else:
            return False
    
def is_valid_value(value_str):
    try:
        float(value_str)
        return True
    except ValueError:
        if value_str.lower() in NULL_FIELD_VALUES:
            return True
        else:
            return False

def validate_data(data, filename):
    # validate length of rows
    if not all([len(_d) == 3 for _d in data]):
        raise Exception(f"{filename}: Invalid file format! Some rows contain more than 3 columns.")

    # validate datetime format
    if not all([is_valid_date(_d[1]) for _d in data]):
        raise Exception(f"{filename}: Invalid file data! Second column is not in the right datetime format: {DATE_FORMAT}")

    # validate values
    if not all([is_valid_value(_d[2]) for _d in data]):
        raise Exception(f"{filename}: Invalid file data! Third column values cannot be converted to float.")

    # validate uniqueness of ticker (first column)
    if len(set([_d[0] for _d in data])) > 1:
        raise Exception(f"{filename}: Invalid file data! First column contains more than one ticker.")
    
    return True

In [335]:
def preprocess_data(data, drop_nulls=True, sort_by_date=True):
    # drop null values
    if drop_nulls:
        data = [v for v in data if not any([d in NULL_FIELD_VALUES for d in v])]

    # convert to float and datetime
    data = [[v[0], to_datetime(v[1]), float(v[2])] for v in data]

    # sort by date
    if sort_by_date:
        data = sorted(data, key=lambda v: v[1])
    
    return data

In [383]:
def read_validate_preprocess(csv_file):
    # read data from the csv file
    data = read_csv(csv_file)    
    
    # check if data is empty
    if not data:
        print(f'{csv_file} is empty.')
    
    # run validation tests
    validate_data(data, csv_file)
    
    # preprocess the data
    data = preprocess_data(data)
    
    return data

In [435]:
data = read_csv(file_path)

In [452]:
def from_datetime(date, date_format=DATE_FORMAT):
    return datetime.strftime(date, date_format)

preprocess_row = lambda x: [x[0], to_datetime(x[1]), float(x[2])]
postprocess_row = lambda x: [x[0], from_datetime(x[1]), str(x[2])]

x = list(map(preprocess_row, data))

x = list(map(postprocess_row, x))

x

[['FLTR', '01-09-2023', '16340.0'],
 ['FLTR', '02-09-2023', '16258.3'],
 ['FLTR', '03-09-2023', '16274.56'],
 ['FLTR', '04-09-2023', '16176.91'],
 ['FLTR', '05-09-2023', '16419.56'],
 ['FLTR', '06-09-2023', '16288.21'],
 ['FLTR', '07-09-2023', '16483.67'],
 ['FLTR', '08-09-2023', '16516.63'],
 ['FLTR', '09-09-2023', '16401.02'],
 ['FLTR', '10-09-2023', '16384.62'],
 ['FLTR', '11-09-2023', '16482.92'],
 ['FLTR', '12-09-2023', '16351.06'],
 ['FLTR', '13-09-2023', '16220.25'],
 ['FLTR', '14-09-2023', '16171.59'],
 ['FLTR', '15-09-2023', '16106.91'],
 ['FLTR', '16-09-2023', '16058.58'],
 ['FLTR', '17-09-2023', '16283.4'],
 ['FLTR', '18-09-2023', '16397.39'],
 ['FLTR', '19-09-2023', '16561.36'],
 ['FLTR', '20-09-2023', '16644.17'],
 ['FLTR', '21-09-2023', '16793.97'],
 ['FLTR', '22-09-2023', '16760.38'],
 ['FLTR', '23-09-2023', '16626.3'],
 ['FLTR', '24-09-2023', '16493.29'],
 ['FLTR', '25-09-2023', '16344.85'],
 ['FLTR', '26-09-2023', '16263.12'],
 ['FLTR', '27-09-2023', '16328.17'],
 ['FL

## API 1

In [430]:
response = dict()

for csv_file in csv_files:
    data = read_validate_preprocess(csv_file)

    random_timestamp = random.randint(0, len(data) - consecutive_timestamps - 1)
    
    data = data[random_timestamp : random_timestamp + 10]
    
    response["_".join(os.path.splitext(csv_file)[0].split("/")[-2:])] = data
    
if not len(response):
    raise Exception('No data found in the csv files.')

d = json.loads(json.dumps(response, default=str, indent=3))

In [433]:
data

[['FLTR', datetime.datetime(2023, 10, 23, 0, 0), 16915.58],
 ['FLTR', datetime.datetime(2023, 10, 24, 0, 0), 17050.9],
 ['FLTR', datetime.datetime(2023, 10, 25, 0, 0), 17085.0],
 ['FLTR', datetime.datetime(2023, 10, 26, 0, 0), 17170.43],
 ['FLTR', datetime.datetime(2023, 10, 27, 0, 0), 17256.28],
 ['FLTR', datetime.datetime(2023, 10, 28, 0, 0), 17083.72],
 ['FLTR', datetime.datetime(2023, 10, 29, 0, 0), 17152.05],
 ['FLTR', datetime.datetime(2023, 10, 30, 0, 0), 17375.03],
 ['FLTR', datetime.datetime(2023, 10, 31, 0, 0), 17583.53],
 ['FLTR', datetime.datetime(2023, 11, 1, 0, 0), 17671.45]]

## API 2

In [None]:
data = read_validate_preprocess(csv_files[0])

random_timestamp = random.randint(0, len(data) - consecutive_timestamps - 1)

data = data[random_timestamp : random_timestamp + 10]

last_date = data[-1][1]
ticker_str = data[0][0]

pred_n1 = float(sorted(data, key=lambda v: v[2], reverse=True)[1][2])
pred_n2 = (float(data[-1][2]) - float(pred_n1)) / 2
pred_n3 = (pred_n1 - pred_n2) / 2

data = data + [
    [ticker_str, last_date + timedelta(days=1), round(pred_n1, 2)],
    [ticker_str, last_date + timedelta(days=2), round(pred_n2, 2)],
    [ticker_str, last_date + timedelta(days=3), round(pred_n3, 2)],
]

print(json.dumps(data, default=str, indent=3))

[
   [
      "TSLA",
      "2023-11-30 00:00:00",
      204.29
   ],
   [
      "TSLA",
      "2023-12-01 00:00:00",
      206.94
   ],
   [
      "TSLA",
      "2023-12-02 00:00:00",
      209.43
   ],
   [
      "TSLA",
      "2023-12-03 00:00:00",
      207.75
   ],
   [
      "TSLA",
      "2023-12-04 00:00:00",
      208.37
   ],
   [
      "TSLA",
      "2023-12-05 00:00:00",
      211.08
   ],
   [
      "TSLA",
      "2023-12-06 00:00:00",
      212.77
   ],
   [
      "TSLA",
      "2023-12-07 00:00:00",
      214.05
   ],
   [
      "TSLA",
      "2023-12-08 00:00:00",
      212.12
   ],
   [
      "TSLA",
      "2023-12-09 00:00:00",
      214.24
   ],
   [
      "TSLA",
      "2023-12-10 00:00:00",
      212.77
   ],
   [
      "TSLA",
      "2023-12-11 00:00:00",
      0.73
   ],
   [
      "TSLA",
      "2023-12-12 00:00:00",
      106.02
   ]
]


In [268]:
mean_returned = sum(float(v[2]) for v in returned_data) / len(returned_data)
std_returned = math.sqrt((sum((float(v[2]) - mean_returned)**2 for v in returned_data) / (len(returned_data)-1)))

In [326]:
last_date = returned_data[-1][1]
ticker_str = returned_data[0][0]

returned_data = returned_data + [
    [ticker_str, last_date + timedelta(days=1), round(random.gauss(mean_returned, std_returned), 2)] for _ in range(3)]

print(json.dumps(returned_data, default=str, indent=3))

[
   [
      "FLTR",
      "2023-09-10 00:00:00",
      16384.62
   ],
   [
      "FLTR",
      "2023-09-11 00:00:00",
      16482.92
   ],
   [
      "FLTR",
      "2023-09-12 00:00:00",
      16351.06
   ],
   [
      "FLTR",
      "2023-09-13 00:00:00",
      16220.25
   ],
   [
      "FLTR",
      "2023-09-14 00:00:00",
      16171.59
   ],
   [
      "FLTR",
      "2023-09-15 00:00:00",
      16106.91
   ],
   [
      "FLTR",
      "2023-09-16 00:00:00",
      16058.58
   ],
   [
      "FLTR",
      "2023-09-17 00:00:00",
      16283.4
   ],
   [
      "FLTR",
      "2023-09-18 00:00:00",
      16397.39
   ],
   [
      "FLTR",
      "2023-09-19 00:00:00",
      16561.36
   ],
   [
      "FLTR",
      "2023-09-20 00:00:00",
      16397.39
   ],
   [
      "FLTR",
      "2023-09-21 00:00:00",
      81.98500000000058
   ],
   [
      "FLTR",
      "2023-09-22 00:00:00",
      8157.702499999999
   ],
   [
      "FLTR",
      "2023-09-23 00:00:00",
      16542.46
   ],
   [
      "FLT