In [1]:
import os
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Find the most appropriate price fluctuation division
def best_fluc(dataframe):
    best_fluc = 0
    df = dataframe
    min_diff = 10000
    for x in np.arange(0,10,0.5):
        fluc_percentage = x
        rising_count = falling_count = stable_count = 0
        for i in range(0,np.shape(df)[0]):
            if df.loc[i,'price_change'] > x:
                rising_count+=1
            elif df.loc[i,'price_change'] < -x:
                falling_count+=1
            else:
                stable_count+=1
        aver = (rising_count+falling_count+stable_count)/3
        curr_diff = np.average([abs(rising_count-aver),abs(falling_count-aver),abs(stable_count-aver)])
        if curr_diff < min_diff:
            min_diff = curr_diff
            best_fluc = x
    return best_fluc

In [3]:
# Set the directory paths for input and output files
input_dir = 'CSV/'
output_dir = 'CSV_10/'

# Loop over each input file in the input directory
for filename in os.listdir(input_dir):
    if not filename.endswith('.csv'):
        continue

    with open(os.path.join(input_dir, filename), 'r') as file:
        reader = csv.reader(file)
        header = next(reader)
        data = [tuple(map(float, row)) for row in reader]

    # compute the averages for each block of rows
    n = len(data)
    i = 0
    block = []
    averages = []
    while i < n:
        block.append(data[i])
        j = i + 1
        # Set the time interval to 10s
        while j < n and data[j][0] - data[i][0] <= 10:
            block.append(data[j])
            j += 1
        # compute the average of the accumulated rows
        if block:
            average =(block[0][0],) + tuple(round(sum(x[i] for x in block) / len(block),2) for i in range(1, len(block[0])))
            averages.append(average)
        # reset the accumulation for the next block of rows
        block = []
        i = j    

    # covert list to df
    df = pd.DataFrame(averages, columns=['time', 'bid_weighted_average', 'bid_high','bid_volumn',\
                         'ask_weighted_average','ask_low','ask_volumn',\
                         'mid_price','spread','bid_ask_ratio'])

    # create new column vectors for categories
    df['Trend'] = 0
    df['rising'] = 0
    df['falling'] = 0
    df['stable'] = 0

    # create new column for price changes
    df['price_change'] = 0
    for i in range(1,np.shape(df)[0]):
        df.loc[i,'price_change'] = ((df.loc[i,'mid_price'] - df.loc[i-1,'mid_price']) / df.loc[i-1,'mid_price']) * 100
        
    # Choose the x value that makes the three categories most evenly distributed
    fluc_percentage = best_fluc(df)
    print(fluc_percentage)

    for i in range(0,np.shape(df)[0]):
        if df.loc[i,'price_change'] > fluc_percentage:
            df.loc[i,'Trend'] = 1
            df.loc[i,'rising'] = 1
        elif df.loc[i,'price_change'] < -fluc_percentage:
            df.loc[i,'Trend'] = 2
            df.loc[i,'falling'] = 1
        else:
            df.loc[i,'Trend'] = 0
            df.loc[i,'stable'] = 1

    # drop the first row
    df = df.drop(0)

    # drop the price_change column
    df = df.drop('price_change', axis=1)

    rising_count = df['rising'].sum()
    falling_count = df['falling'].sum()
    stable_count = df['stable'].sum()
    total_count = rising_count+falling_count+stable_count
    rising_percentage = (rising_count / total_count) * 100
    falling_percentage = (falling_count / total_count) * 100
    stable_percentage = (stable_count / total_count) * 100
    print(f"Percentage of rising: {rising_percentage:.2f}%")
    print(f"Percentage of falling: {falling_percentage:.2f}%")
    print(f"Percentage of stable: {stable_percentage:.2f}%")

    # save as a new csv
    df.to_csv(os.path.join(output_dir,filename), index=False)

1.0
Percentage of rising: 28.50%
Percentage of falling: 32.44%
Percentage of stable: 39.06%
1.0
Percentage of rising: 29.53%
Percentage of falling: 34.76%
Percentage of stable: 35.72%
0.5
Percentage of rising: 34.93%
Percentage of falling: 39.86%
Percentage of stable: 25.21%
0.5
Percentage of rising: 34.59%
Percentage of falling: 37.60%
Percentage of stable: 27.81%
2.5
Percentage of rising: 32.05%
Percentage of falling: 36.13%
Percentage of stable: 31.82%
2.0
Percentage of rising: 29.21%
Percentage of falling: 34.70%
Percentage of stable: 36.09%
2.0
Percentage of rising: 30.93%
Percentage of falling: 36.23%
Percentage of stable: 32.85%
1.5
Percentage of rising: 29.21%
Percentage of falling: 33.51%
Percentage of stable: 37.28%
4.0
Percentage of rising: 31.68%
Percentage of falling: 35.75%
Percentage of stable: 32.57%
4.5
Percentage of rising: 30.84%
Percentage of falling: 33.69%
Percentage of stable: 35.47%
2.0
Percentage of rising: 29.37%
Percentage of falling: 35.47%
Percentage of sta

9.5
Percentage of rising: 32.59%
Percentage of falling: 34.58%
Percentage of stable: 32.83%
9.5
Percentage of rising: 33.63%
Percentage of falling: 35.99%
Percentage of stable: 30.38%
9.5
Percentage of rising: 32.14%
Percentage of falling: 34.86%
Percentage of stable: 33.00%
9.5
Percentage of rising: 32.07%
Percentage of falling: 34.99%
Percentage of stable: 32.94%
9.5
Percentage of rising: 32.74%
Percentage of falling: 35.95%
Percentage of stable: 31.31%
9.5
Percentage of rising: 32.39%
Percentage of falling: 35.51%
Percentage of stable: 32.10%
9.5
Percentage of rising: 32.39%
Percentage of falling: 35.21%
Percentage of stable: 32.39%
9.5
Percentage of rising: 33.65%
Percentage of falling: 36.17%
Percentage of stable: 30.17%
9.5
Percentage of rising: 33.29%
Percentage of falling: 36.90%
Percentage of stable: 29.81%
9.5
Percentage of rising: 33.59%
Percentage of falling: 35.94%
Percentage of stable: 30.47%
9.5
Percentage of rising: 33.31%
Percentage of falling: 36.69%
Percentage of sta

# -----( •̀ ω •́ )y--- I am a  dividing line -----( •̀ ω •́ )y-----