Reorder columns in all csv files

In [None]:
import os
import pandas as pd

# Define the folder path containing the CSV files
folder_path = '../outputs/aa/'

# Define the columns and their desired new positions
column_positions = {
    # 'fee_%': 5,  # Zero-based index for the 5th column
    # 'num_tickers': 6,  # Zero-based index for the 6th column
    'nlargest_nsmallest': 7  # Zero-based index for the 7th column
}

# Iterate over all CSV files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        # Read each CSV file into a DataFrame
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)

        # Iterate over the columns and their new positions
        for column_name, new_index in column_positions.items():
            # Get the index of the column to be moved
            current_index = df.columns.get_loc(column_name)

            # Remove the column from its current position
            column = df.pop(column_name)

            # Insert the column at the desired position
            df.insert(new_index, column_name, column)

        # Save the updated DataFrame to the same CSV file, overwriting the original file
        df.to_csv(file_path, index=False)

Combine all csv files and drop duplicates

In [None]:
import os
import pandas as pd

# Define the folder path containing the CSV files
folder_path = '../outputs/aa/'

# Initialize an empty DataFrame to hold the combined data
combined_df = pd.DataFrame()

# Iterate over all CSV files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        # Read each CSV file into a DataFrame
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)
        
        # Append the DataFrame to the combined DataFrame
        combined_df = combined_df.append(df, ignore_index=True)

# Drop duplicate rows from the combined DataFrame
combined_df.drop_duplicates(inplace=True)

# Sort the DataFrame by "yearly_profit" column in descending order
combined_df.sort_values('yearly_profit', ascending=False, inplace=True)

print(len(combined_df))

# Save the combined DataFrame to a new CSV file
combined_df.to_csv(f'{folder_path}2023-07-11_combined_profit_results2.csv', index=False)

Pivot table

In [None]:
import pandas as pd

filename = 'results_2023-07-29_17h30m24s'

df = pd.read_csv(f'../outputs/{filename}.csv')

# Add column year & move to 2nd column
df['year'] = df['date_range'].str[:4]
# current_index = df.columns.get_loc("year") # Get the index of the "year" column
column = df.pop("year") # Remove the "year" column from its current position
df.insert(1, "year", column) # Insert the "year" column at the desired position (index 1)

df.to_csv(f'../outputs/{filename}.csv', index=False)

# pivot_table = pd.pivot_table(df,
#                              index=['watch_days', 'hold_days', 'num_stocks_to_buy', 'loss_limit', 'fee_%', 'num_tickers', 'nsmallest/nlargest'],
#                              columns=['year'],
#                              values=['yearly_profit', 'win_ratio'],
#                              aggfunc={
#                                 'yearly_profit': ['median', 'mean', 'min', 'max', ('count where < 1', lambda x: (x < 1).sum()), 'count'],
#                                 'win_ratio': [('win_median', lambda x: np.median(x))]
#                              },
#                              margins=True)

def get_overall_cross_tab(df):
    def get_cross_tab(df, values_column, agg_funcs, sorting_func):
        cross_tab = pd.crosstab(
            index=[df['start_watch_time'], df['buy_time'], df['sell_time'], df['rank'], df['pct_change_threshold'], df['watch_days'], df['hold_days'], df['num_stocks_to_buy'], df['loss_limit'], df['fee'], df['num_tickers']],
            columns=df['year'],
            values=df[values_column],
            aggfunc=agg_funcs,
            margins=True)

        cross_tab = cross_tab.sort_values(by=[(sorting_func, 'All')], ascending=False)

        return cross_tab

    cross_tab_profit = get_cross_tab(df, 'yearly_profit', ['median', 'mean', 'min', 'max', 'count'], 'median')
    cross_tab_win_ratio = get_cross_tab(df, 'win_ratio', ['mean'], 'mean')

    cross_tab_profit.columns = pd.MultiIndex.from_tuples([('yearly_profit', col) for col in cross_tab_profit.columns])
    cross_tab_win_ratio.columns = pd.MultiIndex.from_tuples([('win_ratio', col) for col in cross_tab_win_ratio.columns])

    overall_cross_tab = pd.concat([cross_tab_profit, cross_tab_win_ratio], axis=1)

    print(overall_cross_tab.to_markdown())


    return overall_cross_tab

overall_cross_tab = get_overall_cross_tab(df)

overall_cross_tab.to_csv(f'../outputs/pivot_table_{filename}.csv')


Convert single column CSV file to list

In [1]:
import csv

file_path = '../db/tickers_total_daily_price_greater_than_e6.csv'

def csv_to_list(file_path):
    with open(file_path, 'r') as csv_file:
        reader = csv.reader(csv_file)
        column_list = [row[0] for row in reader]

    return column_list

result_list = csv_to_list(file_path)
print(result_list)

['ACKB.BR', 'AED.BR', 'AGS.BR', 'ARGX.BR', 'AZE.BR', 'BEKB.BR', 'COFB.BR', 'COLR.BR', 'DIE.BR', 'ELI.BR', 'EURN.BR', 'FAGR.BR', 'GBLB.BR', 'KBCA.BR', 'MELE.BR', 'ONTEX.BR', 'PROX.BR', 'SHUR.BR', 'SOLB.BR', 'TNET.BR', 'UCB.BR', 'VGP.BR', 'WDP.BR', 'XIOR.BR', 'ALK-B.CO', 'ALMB.CO', 'AMBU-B.CO', 'MAERSK-B.CO', 'BAVA.CO', 'BIOPOR.CO', 'BOOZT-DKK.CO', 'AOJ-B.CO', 'CARL-B.CO', 'CBRAIN.CO', 'CHEMM.CO', 'COLO-B.CO', 'COLUM.CO', 'DNORD.CO', 'DANSKE.CO', 'DFDS.CO', 'FLS.CO', 'GMAB.CO', 'GN.CO', 'GREENH.CO', 'HLUN-A.CO', 'HLUN-B.CO', 'HH.CO', 'ISS.CO', 'JYSK.CO', 'MATAS.CO', 'NETC.CO', 'NKT.CO', 'NNIT.CO', 'NDA-DK.CO', 'NOVO-B.CO', 'ORSTED.CO', 'PNDORA.CO', 'RILBA.CO', 'ROCK-B.CO', 'RBREW.CO', 'RTX.CO', 'SHAPE.CO', 'SKJE.CO', 'SPNO.CO', 'SYDB.CO', 'TRMD-A.CO', 'TRYG.CO', 'VWS.CO', 'VJBA.CO', 'DEMANT.CO', 'ZEAL.CO', 'ANA.MC', 'ACX.MC', 'AENA.MC', 'AMP.MC', 'APPS.MC', 'AI.MC', 'A3M.MC', 'ADX.MC', 'BBVA.MC', 'SAN.MC', 'BKY.MC', 'BST.MC', 'CABK.MC', 'CLNX.MC', 'LOG.MC', 'CIE.MC', 'ANE.MC', 'EDR.MC', 

Convert pkl file to csv or xls file

In [1]:
import pandas as pd

file_name = 'ohlcv_ntickers_1254_2000-08-01_to_2023-12-23'
folder_path = '../db/'

data = pd.read_pickle(f'{folder_path}{file_name}.pkl')
df = pd.DataFrame(data)

# Reset MultiIndex columns to a single level
# df.columns = df.columns.droplevel(0)

# df.to_excel(f'{folder_path}{file_name}_numbers.xlsx', engine='openpyxl', float_format="%.5f")
df.to_excel(f'{folder_path}{file_name}_numbers.xlsx', float_format="%.5f")
