In [6]:
import pandas as pd

# Wczytanie pliku CSV
file_path = "ex_ret.csv"  # Upewnij się, że plik jest w tym samym folderze co skrypt
df = pd.read_csv(file_path, sep=";", index_col=0)

# Konwersja indeksu na format daty dla poprawnego sortowania
df.index = pd.to_datetime(df.index, format="%d.%m.%Y", errors='coerce')

# Usunięcie pustych wierszy (gdzie wszystkie wartości to NaN)
df = df.dropna(how="all")

# Sprawdzenie, czy DataFrame nie jest pusty
if df.empty:
    print("❌ Błąd: Plik CSV jest pusty lub format danych jest niepoprawny.")
else:
    # Lista na wyniki dla każdego miesiąca
    results_list = []
    
    # Zmienna przechowująca listę wybranych firm na 12 miesięcy
    current_top_firms = []
    current_bottom_firms = []

    for i, selected_month in enumerate(df.index.unique()):  # Iteracja po unikalnych miesiącach
        # Co 12 miesięcy aktualizujemy listę firm
        if i % 12 == 0:
            # Pobranie zwrotów dla wybranego miesiąca
            returns_selected_month = df.loc[selected_month]

            # Sortowanie zwrotów, aby znaleźć najlepsze i najgorsze
            sorted_returns = returns_selected_month.sort_values(ascending=False)

            # Pobranie tylko **20% najlepszych i 20% najgorszych** akcji
            num_stocks = len(sorted_returns)
            current_top_firms = sorted_returns.head(int(num_stocks * 0.5)).index.tolist()
            current_bottom_firms = sorted_returns.tail(int(num_stocks * 0.5)).index.tolist()

        # Zapisanie wyników do listy dla każdego miesiąca
        results_list.append(pd.DataFrame({
            "Date": [selected_month.strftime('%Y-%m')],
            "Top Performers": [", ".join(current_top_firms)],
            "Bottom Performers": [", ".join(current_bottom_firms)]
        }))

    # Połączenie wyników w jeden DataFrame
    results_df = pd.concat(results_list, ignore_index=True).set_index("Date")

    # Wyświetlenie tabeli
    print(results_df)

    # Zapisanie wyników do pliku CSV
    output_file = "monthly_top_bottom_50_percent_12_months.csv"
    results_df.to_csv(output_file, sep=",", encoding="utf-8")

    print(f"\n✅ Wyniki zapisano do pliku: {output_file}")


                                            Top Performers  \
Date                                                         
2002-02  EMN, SJM, AVY, DHI, GL, IEX, SNA, ATO, PPG, HB...   
2002-03  EMN, SJM, AVY, DHI, GL, IEX, SNA, ATO, PPG, HB...   
2002-04  EMN, SJM, AVY, DHI, GL, IEX, SNA, ATO, PPG, HB...   
2002-05  EMN, SJM, AVY, DHI, GL, IEX, SNA, ATO, PPG, HB...   
2002-06  EMN, SJM, AVY, DHI, GL, IEX, SNA, ATO, PPG, HB...   
...                                                    ...   
2023-08  CINF, ROK, CHD, PPG, HBAN, ROP, ESS, GL, INCY,...   
2023-09  CINF, ROK, CHD, PPG, HBAN, ROP, ESS, GL, INCY,...   
2023-10  CINF, ROK, CHD, PPG, HBAN, ROP, ESS, GL, INCY,...   
2023-11  CINF, ROK, CHD, PPG, HBAN, ROP, ESS, GL, INCY,...   
2023-12  CINF, ROK, CHD, PPG, HBAN, ROP, ESS, GL, INCY,...   

                                         Bottom Performers  
Date                                                        
2002-02  CMI, MAA, DTE, COO, BRO, TTWO, NTRS, AOS, ROP,...  
2002-03  C

In [7]:
import pandas as pd

# Load the precomputed list of Top and Bottom performers
file_path_top_bottom = "monthly_top_bottom_50_percent_12_months.csv"
df_top_bottom = pd.read_csv(file_path_top_bottom, sep=",", index_col=0)

# Convert index to datetime to match the format in ret.csv
df_top_bottom.index = pd.to_datetime(df_top_bottom.index, format="%Y-%m", errors='coerce')

# Load the returns dataset (ret.csv)
file_path_returns = "ex_ret.csv"
df_returns = pd.read_csv(file_path_returns, sep=";", index_col=0)

# Convert index to datetime (DD.MM.YYYY format)
df_returns.index = pd.to_datetime(df_returns.index, format="%d.%m.%Y", errors='coerce')

# Convert returns index to monthly format (YYYY-MM) to match df_top_bottom
df_returns.index = df_returns.index.to_period("M")

# 🟢 🔹 **Usunięcie % i konwersja na float** 🔹 🟢
df_returns = df_returns.replace("%", "", regex=True).astype(str).applymap(lambda x: x.replace(",", "."))
df_returns = df_returns.apply(pd.to_numeric, errors='coerce')

# Drop rows with NaN values
df_returns = df_returns.dropna(how="all")

# Initialize list for results
avg_results_list = []

# Iterate through available months
for selected_month in df_top_bottom.index:
    period_month = selected_month.to_period("M")  # Convert to Period format

    if period_month in df_returns.index:
        # Retrieve the list of top and bottom performers
        top_firms = df_top_bottom.loc[selected_month, "Top Performers"]
        bottom_firms = df_top_bottom.loc[selected_month, "Bottom Performers"]

        # Ensure lists are properly formatted
        if isinstance(top_firms, str) and isinstance(bottom_firms, str):
            top_firms = [firm.strip() for firm in top_firms.split(",")]
            bottom_firms = [firm.strip() for firm in bottom_firms.split(",")]
        else:
            continue  # Skip iteration if data is not properly formatted

        # Get the returns for these firms from the returns dataset
        selected_returns_month = df_returns.loc[period_month]

        # Extract returns of top and bottom firms (only if they exist in returns dataset)
        valid_top_firms = [firm for firm in top_firms if firm in selected_returns_month]
        valid_bottom_firms = [firm for firm in bottom_firms if firm in selected_returns_month]

        if valid_top_firms and valid_bottom_firms:
            # 🟢 🔹 **Usunięcie błędnych wartości i konwersja na float** 🔹 🟢
            top_returns = selected_returns_month[valid_top_firms].apply(pd.to_numeric, errors='coerce').mean()
            bottom_returns = selected_returns_month[valid_bottom_firms].apply(pd.to_numeric, errors='coerce').mean()

            # Store the result
            avg_results_list.append(pd.DataFrame({
                "Date": [selected_month.strftime('%Y-%m')],
                "Avg Top Performers": [top_returns],
                "Avg Bottom Performers": [bottom_returns]
            }))

# Combine results into a single DataFrame
if avg_results_list:
    avg_results_df = pd.concat(avg_results_list, ignore_index=True).set_index("Date")
    # Save results to CSV
    output_file = "avg_monthly_top_bottom_50_percent_12_months.csv"
    avg_results_df.to_csv(output_file, sep=",", encoding="utf-8")

    print(f"\n✅ Results saved to: {output_file}")
    print(avg_results_df)
else:
    print("\n❌ No matching data found. Check the formatting of the files.")


  df_returns = df_returns.replace("%", "", regex=True).astype(str).applymap(lambda x: x.replace(",", "."))



✅ Results saved to: avg_monthly_top_bottom_50_percent_12_months.csv
         Avg Top Performers  Avg Bottom Performers
Date                                              
2002-02              5.8984                -3.5812
2002-03              3.2584                 7.6728
2002-04             -0.4392                -2.8924
2002-05             -0.0784                -1.2944
2002-06             -2.7364                -7.0036
...                     ...                    ...
2023-08             -3.8780                -4.0060
2023-09             -6.0892                -6.9704
2023-10             -2.4472                -4.3328
2023-11              7.0432                 7.5284
2023-12              6.1896                 6.5060

[263 rows x 2 columns]


In [8]:
import pandas as pd

# Load the precomputed list of Top and Bottom performers
file_path_top_bottom = "monthly_top_bottom_50_percent_12_months.csv"
df_top_bottom = pd.read_csv(file_path_top_bottom, sep=",", index_col=0)

# Convert index to datetime to match the format in ret.csv
df_top_bottom.index = pd.to_datetime(df_top_bottom.index, format="%Y-%m", errors='coerce')

# Load the returns dataset (ret.csv)
file_path_returns = "ret.csv"
df_returns = pd.read_csv(file_path_returns, sep=";", index_col=0)

# Convert index to datetime (DD.MM.YYYY format)
df_returns.index = pd.to_datetime(df_returns.index, format="%d.%m.%Y", errors='coerce')

# Convert returns index to monthly format (YYYY-MM) to match df_top_bottom
df_returns.index = df_returns.index.to_period("M")

# 🟢 🔹 **Usunięcie % i konwersja na float** 🔹 🟢
df_returns = df_returns.replace("%", "", regex=True).astype(str).applymap(lambda x: x.replace(",", "."))
df_returns = df_returns.apply(pd.to_numeric, errors='coerce')

# Drop rows with NaN values
df_returns = df_returns.dropna(how="all")

# Initialize list for results
avg_results_list = []

# Iterate through available months
for selected_month in df_top_bottom.index:
    period_month = selected_month.to_period("M")  # Convert to Period format

    if period_month in df_returns.index:
        # Retrieve the list of top and bottom performers
        top_firms = df_top_bottom.loc[selected_month, "Top Performers"]
        bottom_firms = df_top_bottom.loc[selected_month, "Bottom Performers"]

        # Ensure lists are properly formatted
        if isinstance(top_firms, str) and isinstance(bottom_firms, str):
            top_firms = [firm.strip() for firm in top_firms.split(",")]
            bottom_firms = [firm.strip() for firm in bottom_firms.split(",")]
        else:
            continue  # Skip iteration if data is not properly formatted

        # Get the returns for these firms from the returns dataset
        selected_returns_month = df_returns.loc[period_month]

        # Extract returns of top and bottom firms (only if they exist in returns dataset)
        valid_top_firms = [firm for firm in top_firms if firm in selected_returns_month]
        valid_bottom_firms = [firm for firm in bottom_firms if firm in selected_returns_month]

        if valid_top_firms and valid_bottom_firms:
            # 🟢 🔹 **Usunięcie błędnych wartości i konwersja na float** 🔹 🟢
            top_returns = selected_returns_month[valid_top_firms].apply(pd.to_numeric, errors='coerce').mean()
            bottom_returns = selected_returns_month[valid_bottom_firms].apply(pd.to_numeric, errors='coerce').mean()

            # Store the result
            avg_results_list.append(pd.DataFrame({
                "Date": [selected_month.strftime('%Y-%m')],
                "Avg Top Performers": [top_returns],
                "Avg Bottom Performers": [bottom_returns]
            }))

# Combine results into a single DataFrame
if avg_results_list:
    avg_results_df = pd.concat(avg_results_list, ignore_index=True).set_index("Date")
    # Save results to CSV
    output_file = "avg_monthly_top_bottom_50_percent_12_months_ret.csv"
    avg_results_df.to_csv(output_file, sep=",", encoding="utf-8")

    print(f"\n✅ Results saved to: {output_file}")
    print(avg_results_df)
else:
    print("\n❌ No matching data found. Check the formatting of the files.")


  df_returns = df_returns.replace("%", "", regex=True).astype(str).applymap(lambda x: x.replace(",", "."))



✅ Results saved to: avg_monthly_top_bottom_50_percent_12_months_ret.csv
         Avg Top Performers  Avg Bottom Performers
Date                                              
2002-02              6.0284                -3.4512
2002-03              3.3884                 7.8028
2002-04             -0.2892                -2.7424
2002-05              0.0616                -1.1544
2002-06             -2.6064                -6.8736
...                     ...                    ...
2023-08             -3.4280                -3.5560
2023-09             -5.6592                -6.5404
2023-10             -1.9772                -3.8628
2023-11              7.4832                 7.9684
2023-12              6.6196                 6.9360

[263 rows x 2 columns]
