Add columns to a specific csv file

In [None]:
import pandas as pd
# import config as cfg

# Read the CSV file into a DataFrame
filename = 'nsmallest_fee_0.2% - 01-07-2023'
df = pd.read_csv(f'../outputs/{filename}.csv')

df['fee_%'] = 0.2
df['num_tickers'] = 208
df['nsmallest/nlargest'] = 'nsmallest'

df.to_csv(f'../outputs/{filename}.csv', index=False)

In [None]:
import pandas as pd

# Read the CSV file into a DataFrame
filename = 'combined_profit_results'
df = pd.read_csv(f'../outputs/{filename}.csv')

df['parameters'] = 0.2
df['num_tickers'] = 208
df['nsmallest/nlargest'] = 'nsmallest'

df.to_csv(f'../outputs/{filename}.csv', index=False)

In [None]:
import pandas as pd

# Read the CSV file into a DataFrame
df = pd.read_csv('../outputs/combined_profit_results.csv')

# Create the "parameters" column by concatenating other columns
df['parameters'] = 'w' + df['watch_days'].astype(str) + ', h' + df['hold_days'].astype(str) + ', n' + df['num_stocks_to_buy'].astype(str) + ', l' + df['loss_limit'].astype(str) + ', f' + df['fee_%'].astype(str) + ', nt' + df['num_tickers'].astype(str) + ', ' + df['nsmallest/nlargest'].astype(str)

# Reorder the columns to place the "parameters" column in the third position
columns = list(df.columns)
columns.insert(2, 'parameters')
df = df.reindex(columns=columns)

df.to_csv(f'../outputs/combined_profit_results2.csv', index=False)

Reorder columns in all csv files

In [None]:
import os
import pandas as pd

# Define the folder path containing the CSV files
folder_path = '../outputs/'

# Define the columns and their desired new positions
column_positions = {
    'fee_%': 5,  # Zero-based index for the 5th column
    'num_tickers': 6,  # Zero-based index for the 6th column
    'nsmallest/nlargest': 7  # Zero-based index for the 7th column
}

# Iterate over all CSV files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        # Read each CSV file into a DataFrame
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)

        # Iterate over the columns and their new positions
        for column_name, new_index in column_positions.items():
            # Get the index of the column to be moved
            current_index = df.columns.get_loc(column_name)

            # Remove the column from its current position
            column = df.pop(column_name)

            # Insert the column at the desired position
            df.insert(new_index, column_name, column)

        # Save the updated DataFrame to the same CSV file, overwriting the original file
        df.to_csv(file_path, index=False)

Move column year to 2nd column position

In [None]:
import pandas as pd

# Read the CSV file into a DataFrame
df = pd.read_csv('../outputs/combined_profit_results.csv')

# Get the index of the "year" column
current_index = df.columns.get_loc("year")

# Remove the "year" column from its current position
column = df.pop("year")

# Insert the "year" column at the desired position (index 1)
df.insert(1, "year", column)

df.to_csv('../outputs/combined_profit_results_2.csv', index=False)

Combine all csv files and drop duplicates

In [None]:
import os
import pandas as pd

# Define the folder path containing the CSV files
folder_path = '../outputs/'

# Initialize an empty DataFrame to hold the combined data
combined_df = pd.DataFrame()

# Iterate over all CSV files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        # Read each CSV file into a DataFrame
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)
        
        # Append the DataFrame to the combined DataFrame
        combined_df = combined_df.append(df, ignore_index=True)

# Drop duplicate rows from the combined DataFrame
combined_df.drop_duplicates(inplace=True)

# Sort the DataFrame by "yearly_profit" column in descending order
combined_df.sort_values('yearly_profit', ascending=False, inplace=True)

print(len(combined_df))

# Save the combined DataFrame to a new CSV file
combined_df.to_csv(f'{folder_path}combined_profit_results.csv', index=False)

Pivot table

In [4]:
import pandas as pd

# Read the CSV file into a DataFrame
df = pd.read_csv('../outputs/combined_profit_results.csv')

pivot_table = pd.pivot_table(df,
                             index=['watch_days', 'hold_days', 'num_stocks_to_buy', 'loss_limit', 'fee_%', 'num_tickers', 'nsmallest/nlargest'],
                           #   columns='year',
                             values=['yearly_profit', 'win_ratio'],
                             aggfunc={
                                'yearly_profit': ['median', 'mean', 'min', 'max', ('yearly_profit < 1', lambda x: (x < 1).sum()), ('yearly_profit > 1', lambda x: (x > 1).sum())],
                                'win_ratio': 'median'
                             })


pivot_table = pivot_table.sort_values(by=[('yearly_profit', 'median')], ascending=False)

print(pivot_table.to_markdown())



|                                           |   ('win_ratio', 'median') |   ('yearly_profit', 'max') |   ('yearly_profit', 'mean') |   ('yearly_profit', 'median') |   ('yearly_profit', 'min') |   ('yearly_profit', 'yearly_profit < 1') |
|:------------------------------------------|--------------------------:|---------------------------:|----------------------------:|------------------------------:|---------------------------:|-----------------------------------------:|
| (2, 4, 1, 1.0, 0.3, 208, 'nsmallest')     |                     1     |                    1.41836 |                    1.32083  |                      1.34507  |                    1.11494 |                                        0 |
| (2, 2, 1, 0.999, 0.2, 208, 'nsmallest')   |                     0.87  |                    6.43372 |                    1.52769  |                      1.30591  |                    0.70537 |                                       13 |
| (4, 3, 1, 1.0, 0.3, 208, 'nsmallest')     |       

Add year column

In [None]:
import pandas as pd

# Read the CSV file into a DataFrame
df = pd.read_csv('../outputs/combined_profit_results.csv')

# Add a new column with the first 4 characters of the "date_range" column
df['year'] = df['date_range'].str[:4]

# Print the updated DataFrame
print(df.head(10))

# Save the combined DataFrame to a new CSV file
df.to_csv('../outputs/combined_profit_results_year.csv', index=False)