In [1]:
import pandas as pd

# Wczytanie pliku CSV
file_path = "jensen.csv"  # Upewnij się, że plik jest w tym samym folderze co skrypt
df = pd.read_csv(file_path, sep=";", index_col=0)

# Konwersja indeksu na format daty dla poprawnego sortowania
df.index = pd.to_datetime(df.index, format="%d.%m.%Y", errors='coerce')

# Usunięcie pustych wierszy (gdzie wszystkie wartości to NaN)
df = df.dropna(how="all")

# Sprawdzenie, czy DataFrame nie jest pusty
if df.empty:
    print("❌ Błąd: Plik CSV jest pusty lub format danych jest niepoprawny.")
else:
    # Lista na wyniki dla każdego miesiąca
    results_list = []
    
    # Zmienna przechowująca listę wybranych firm na 12 miesięcy
    current_top_firms = []
    current_bottom_firms = []

    for i, selected_month in enumerate(df.index.unique()):  # Iteracja po unikalnych miesiącach
        # Co 12 miesięcy aktualizujemy listę firm
        if i % 12 == 0:
            # Pobranie zwrotów dla wybranego miesiąca
            returns_selected_month = df.loc[selected_month]

            # Sortowanie zwrotów, aby znaleźć najlepsze i najgorsze
            sorted_returns = returns_selected_month.sort_values(ascending=False)

            # Pobranie tylko **20% najlepszych i 20% najgorszych** akcji
            num_stocks = len(sorted_returns)
            current_top_firms = sorted_returns.head(int(num_stocks * 0.2)).index.tolist()
            current_bottom_firms = sorted_returns.tail(int(num_stocks * 0.2)).index.tolist()

        # Zapisanie wyników do listy dla każdego miesiąca
        results_list.append(pd.DataFrame({
            "Date": [selected_month.strftime('%Y-%m')],
            "Top Performers": [", ".join(current_top_firms)],
            "Bottom Performers": [", ".join(current_bottom_firms)]
        }))

    # Połączenie wyników w jeden DataFrame
    results_df = pd.concat(results_list, ignore_index=True).set_index("Date")

    # Wyświetlenie tabeli
    print(results_df)

    # Zapisanie wyników do pliku CSV
    output_file = "monthly_top_bottom_20_percent_12_months_jensen.csv"
    results_df.to_csv(output_file, sep=",", encoding="utf-8")

    print(f"\n✅ Wyniki zapisano do pliku: {output_file}")


                                            Top Performers  \
Date                                                         
2004-01  HOLX, TTWO, ROK, JBHT, DHI, MHK, POOL, AOS, CO...   
2004-02  HOLX, TTWO, ROK, JBHT, DHI, MHK, POOL, AOS, CO...   
2004-03  HOLX, TTWO, ROK, JBHT, DHI, MHK, POOL, AOS, CO...   
2004-04  HOLX, TTWO, ROK, JBHT, DHI, MHK, POOL, AOS, CO...   
2004-05  HOLX, TTWO, ROK, JBHT, DHI, MHK, POOL, AOS, CO...   
...                                                    ...   
2023-08  NUE, ADM, SJM, ROL, DHI, FDS, HOLX, POOL, BRO,...   
2023-09  NUE, ADM, SJM, ROL, DHI, FDS, HOLX, POOL, BRO,...   
2023-10  NUE, ADM, SJM, ROL, DHI, FDS, HOLX, POOL, BRO,...   
2023-11  NUE, ADM, SJM, ROL, DHI, FDS, HOLX, POOL, BRO,...   
2023-12  NUE, ADM, SJM, ROL, DHI, FDS, HOLX, POOL, BRO,...   

                                         Bottom Performers  
Date                                                        
2004-01  CINF, AFL, LNC, PNW, EMN, STT, BAX, NTRS, INCY...  
2004-02  C

In [2]:
import pandas as pd

# Load the precomputed list of Top and Bottom performers
file_path_top_bottom = "monthly_top_bottom_20_percent_12_months_jensen.csv"
df_top_bottom = pd.read_csv(file_path_top_bottom, sep=",", index_col=0)

# Convert index to datetime to match the format in ret.csv
df_top_bottom.index = pd.to_datetime(df_top_bottom.index, format="%Y-%m", errors='coerce')

# Load the returns dataset (ret.csv)
file_path_returns = "jensen.csv"
df_returns = pd.read_csv(file_path_returns, sep=";", index_col=0)

# Convert index to datetime (DD.MM.YYYY format)
df_returns.index = pd.to_datetime(df_returns.index, format="%d.%m.%Y", errors='coerce')

# Convert returns index to monthly format (YYYY-MM) to match df_top_bottom
df_returns.index = df_returns.index.to_period("M")

# 🟢 🔹 **Usunięcie % i konwersja na float** 🔹 🟢
df_returns = df_returns.replace("%", "", regex=True).astype(str).applymap(lambda x: x.replace(",", "."))
df_returns = df_returns.apply(pd.to_numeric, errors='coerce')

# Drop rows with NaN values
df_returns = df_returns.dropna(how="all")

# Initialize list for results
avg_results_list = []

# Iterate through available months
for selected_month in df_top_bottom.index:
    period_month = selected_month.to_period("M")  # Convert to Period format

    if period_month in df_returns.index:
        # Retrieve the list of top and bottom performers
        top_firms = df_top_bottom.loc[selected_month, "Top Performers"]
        bottom_firms = df_top_bottom.loc[selected_month, "Bottom Performers"]

        # Ensure lists are properly formatted
        if isinstance(top_firms, str) and isinstance(bottom_firms, str):
            top_firms = [firm.strip() for firm in top_firms.split(",")]
            bottom_firms = [firm.strip() for firm in bottom_firms.split(",")]
        else:
            continue  # Skip iteration if data is not properly formatted

        # Get the returns for these firms from the returns dataset
        selected_returns_month = df_returns.loc[period_month]

        # Extract returns of top and bottom firms (only if they exist in returns dataset)
        valid_top_firms = [firm for firm in top_firms if firm in selected_returns_month]
        valid_bottom_firms = [firm for firm in bottom_firms if firm in selected_returns_month]

        if valid_top_firms and valid_bottom_firms:
            # 🟢 🔹 **Usunięcie błędnych wartości i konwersja na float** 🔹 🟢
            top_returns = selected_returns_month[valid_top_firms].apply(pd.to_numeric, errors='coerce').mean()
            bottom_returns = selected_returns_month[valid_bottom_firms].apply(pd.to_numeric, errors='coerce').mean()

            # Store the result
            avg_results_list.append(pd.DataFrame({
                "Date": [selected_month.strftime('%Y-%m')],
                "Avg Top Performers": [top_returns],
                "Avg Bottom Performers": [bottom_returns]
            }))

# Combine results into a single DataFrame
if avg_results_list:
    avg_results_df = pd.concat(avg_results_list, ignore_index=True).set_index("Date")
    # Save results to CSV
    output_file = "avg_monthly_top_bottom_20_percent_12_months_jensen.csv"
    avg_results_df.to_csv(output_file, sep=",", encoding="utf-8")

    print(f"\n✅ Results saved to: {output_file}")
    print(avg_results_df)
else:
    print("\n❌ No matching data found. Check the formatting of the files.")


  df_returns = df_returns.replace("%", "", regex=True).astype(str).applymap(lambda x: x.replace(",", "."))



✅ Results saved to: avg_monthly_top_bottom_20_percent_12_months_jensen.csv
         Avg Top Performers  Avg Bottom Performers
Date                                              
2004-01            0.039026              -0.002530
2004-02            0.036788               0.000981
2004-03            0.037057              -0.000614
2004-04            0.037860              -0.001736
2004-05            0.036358              -0.000952
...                     ...                    ...
2023-08            0.008311              -0.007495
2023-09            0.008768              -0.006940
2023-10            0.006458              -0.007693
2023-11            0.005244              -0.009099
2023-12            0.006494              -0.010372

[240 rows x 2 columns]


In [3]:
import pandas as pd

# Load the precomputed list of Top and Bottom performers
file_path_top_bottom = "monthly_top_bottom_20_percent_12_months_jensen.csv"
df_top_bottom = pd.read_csv(file_path_top_bottom, sep=",", index_col=0)

# Convert index to datetime to match the format in ret.csv
df_top_bottom.index = pd.to_datetime(df_top_bottom.index, format="%Y-%m", errors='coerce')

# Load the returns dataset (ret.csv)
file_path_returns = "ret.csv"
df_returns = pd.read_csv(file_path_returns, sep=";", index_col=0)

# Convert index to datetime (DD.MM.YYYY format)
df_returns.index = pd.to_datetime(df_returns.index, format="%d.%m.%Y", errors='coerce')

# Convert returns index to monthly format (YYYY-MM) to match df_top_bottom
df_returns.index = df_returns.index.to_period("M")

# 🟢 🔹 **Usunięcie % i konwersja na float** 🔹 🟢
df_returns = df_returns.replace("%", "", regex=True).astype(str).applymap(lambda x: x.replace(",", "."))
df_returns = df_returns.apply(pd.to_numeric, errors='coerce')

# Drop rows with NaN values
df_returns = df_returns.dropna(how="all")

# Initialize list for results
avg_results_list = []

# Iterate through available months
for selected_month in df_top_bottom.index:
    period_month = selected_month.to_period("M")  # Convert to Period format

    if period_month in df_returns.index:
        # Retrieve the list of top and bottom performers
        top_firms = df_top_bottom.loc[selected_month, "Top Performers"]
        bottom_firms = df_top_bottom.loc[selected_month, "Bottom Performers"]

        # Ensure lists are properly formatted
        if isinstance(top_firms, str) and isinstance(bottom_firms, str):
            top_firms = [firm.strip() for firm in top_firms.split(",")]
            bottom_firms = [firm.strip() for firm in bottom_firms.split(",")]
        else:
            continue  # Skip iteration if data is not properly formatted

        # Get the returns for these firms from the returns dataset
        selected_returns_month = df_returns.loc[period_month]

        # Extract returns of top and bottom firms (only if they exist in returns dataset)
        valid_top_firms = [firm for firm in top_firms if firm in selected_returns_month]
        valid_bottom_firms = [firm for firm in bottom_firms if firm in selected_returns_month]

        if valid_top_firms and valid_bottom_firms:
            # 🟢 🔹 **Usunięcie błędnych wartości i konwersja na float** 🔹 🟢
            top_returns = selected_returns_month[valid_top_firms].apply(pd.to_numeric, errors='coerce').mean()
            bottom_returns = selected_returns_month[valid_bottom_firms].apply(pd.to_numeric, errors='coerce').mean()

            # Store the result
            avg_results_list.append(pd.DataFrame({
                "Date": [selected_month.strftime('%Y-%m')],
                "Avg Top Performers": [top_returns],
                "Avg Bottom Performers": [bottom_returns]
            }))

# Combine results into a single DataFrame
if avg_results_list:
    avg_results_df = pd.concat(avg_results_list, ignore_index=True).set_index("Date")
    # Save results to CSV
    output_file = "avg_monthly_top_bottom_20_percent_12_months_ret_jensen.csv"
    avg_results_df.to_csv(output_file, sep=",", encoding="utf-8")

    print(f"\n✅ Results saved to: {output_file}")
    print(avg_results_df)
else:
    print("\n❌ No matching data found. Check the formatting of the files.")


  df_returns = df_returns.replace("%", "", regex=True).astype(str).applymap(lambda x: x.replace(",", "."))



✅ Results saved to: avg_monthly_top_bottom_20_percent_12_months_ret_jensen.csv
         Avg Top Performers  Avg Bottom Performers
Date                                              
2004-01              -1.406                  5.344
2004-02               6.634                  3.291
2004-03               6.550                 -1.060
2004-04              -4.794                 -2.473
2004-05               1.756                  1.482
...                     ...                    ...
2023-08              -2.282                 -4.747
2023-09              -6.066                 -8.441
2023-10              -4.186                 -4.242
2023-11               8.737                  9.503
2023-12               6.407                 10.965

[240 rows x 2 columns]
