In [1]:
import pandas as pd

# Load the CSV file
file_path = "jensen.csv"  # Make sure the file is in the same directory as the script
df = pd.read_csv(file_path, sep=";", index_col=0)

# Convert index to datetime for proper sorting
df.index = pd.to_datetime(df.index, format="%d.%m.%Y", errors='coerce')

# Drop rows with NaN values in all columns
df = df.dropna(how="all")

# Check if DataFrame is empty
if df.empty:
    print("Błąd: Plik CSV jest pusty lub format danych jest niepoprawny.")
else:
    # Create a list to store monthly results
    results_list = []

    for selected_month in df.index.unique():  # Loop through unique months
        # Get returns for the selected month
        returns_selected_month = df.loc[selected_month]

        # Sort returns to find best and worst performers
        sorted_returns = returns_selected_month.sort_values(ascending=False)

        # Get top 50% (winners) and bottom 50% (losers)
        num_stocks = len(sorted_returns)
        top_half = sorted_returns.head(num_stocks // 2).index.tolist()
        bottom_half = sorted_returns.tail(num_stocks // 2).index.tolist()

        # Append results to list
        results_list.append(pd.DataFrame({
            "Date": [selected_month.strftime('%Y-%m')],
            "Top Performers": [", ".join(top_half)],
            "Bottom Performers": [", ".join(bottom_half)]
        }))

    # Combine results into a single DataFrame
    results_df = pd.concat(results_list, ignore_index=True).set_index("Date")

    # Display the table
    print(results_df)

    # Optionally, save the results to a CSV file
    results_df.to_csv("monthly_top_bottom_performers_jensen.csv", sep=",", encoding="utf-8")

    print("\nWyniki zapisano do pliku: monthly_top_bottom_performers_jensen.csv")


                                            Top Performers  \
Date                                                         
2004-01  HOLX, TTWO, ROK, JBHT, DHI, MHK, POOL, AOS, CO...   
2004-02  HOLX, TTWO, ROK, JBHT, DHI, MHK, COO, FRT, ROL...   
2004-03  HOLX, TTWO, ROK, JBHT, DHI, MHK, POOL, FRT, CO...   
2004-04  HOLX, ROK, TTWO, JBHT, DHI, MHK, FRT, POOL, BR...   
2004-05  HOLX, ROK, JBHT, TTWO, DHI, POOL, MHK, EIX, CO...   
...                                                    ...   
2023-08  NUE, ADM, AFL, SNA, VMC, DHI, SJM, FE, AVY, ED...   
2023-09  NUE, AFL, ADM, SNA, VMC, AVY, DHI, BRO, ED, CI...   
2023-10  NUE, AFL, SNA, ADM, BRO, VMC, CINF, GL, AVY, R...   
2023-11  NUE, AFL, SNA, ADM, BRO, CINF, DHI, GL, RHI, F...   
2023-12  NUE, AFL, DHI, SNA, ADM, BRO, FE, CINF, VMC, G...   

                                         Bottom Performers  
Date                                                        
2004-01  APD, FDS, ESS, FE, BXP, RHI, ED, DOV, GL, ADM,...  
2004-02  B

PermissionError: [Errno 13] Permission denied: 'monthly_top_bottom_performers_jensen.csv'

In [2]:
import pandas as pd

# Wczytanie pliku CSV
file_path = "jensen.csv"  
df = pd.read_csv(file_path, sep=";", index_col=0)

# Konwersja indeksu na format daty
df.index = pd.to_datetime(df.index, format="%d.%m.%Y", errors='coerce')

# Usunięcie pustych wartości
df = df.dropna(how="all")

# 🔹 Konwersja zwrotów: Usunięcie "%" i zamiana na float
df = df.replace("%", "", regex=True).astype(str).applymap(lambda x: x.replace(",", "."))
df = df.apply(pd.to_numeric, errors='coerce')

# Sprawdzenie, czy DataFrame nie jest pusty
if df.empty:
    print("Błąd: Plik CSV jest pusty lub format danych jest niepoprawny.")
else:
    # Lista do przechowywania wyników
    avg_results_list = []

    # Iteracja po unikalnych miesiącach
    for selected_month in df.index.unique():
        # Pobranie zwrotów dla danego miesiąca
        returns_selected_month = df.loc[selected_month]

        # Sortowanie zwrotów
        sorted_returns = returns_selected_month.sort_values(ascending=False)

        # Podział na najlepsze i najgorsze 50% firm
        num_stocks = len(sorted_returns)
        top_half = sorted_returns.head(num_stocks // 2)
        bottom_half = sorted_returns.tail(num_stocks // 2)

        # 🔹 Obliczenie średnich zwrotów
        avg_top = top_half.mean()
        avg_bottom = bottom_half.mean()

        # Dodanie wyników do listy
        avg_results_list.append(pd.DataFrame({
            "Date": [selected_month.strftime('%Y-%m')],
            "Avg Top Performers": [avg_top],
            "Avg Bottom Performers": [avg_bottom]
        }))

    # Połączenie wyników w jeden DataFrame
    avg_results_df = pd.concat(avg_results_list, ignore_index=True).set_index("Date")

    # Wyświetlenie tabeli
    print(avg_results_df)

    # Zapisanie wyników do pliku CSV
    output_file = "avg_top_bottom_performers_jensen.csv"
    avg_results_df.to_csv(output_file, sep=",", encoding="utf-8")

    print(f"\nWyniki zapisano do pliku: {output_file}")


         Avg Top Performers  Avg Bottom Performers
Date                                              
2004-01            0.025389               0.002973
2004-02            0.025053               0.005592
2004-03            0.023986               0.004058
2004-04            0.025176               0.003000
2004-05            0.024418               0.003531
...                     ...                    ...
2023-08            0.007410              -0.004682
2023-09            0.007791              -0.004110
2023-10            0.006218              -0.005316
2023-11            0.005934              -0.007011
2023-12            0.006067              -0.006910

[240 rows x 2 columns]


  df = df.replace("%", "", regex=True).astype(str).applymap(lambda x: x.replace(",", "."))


PermissionError: [Errno 13] Permission denied: 'avg_top_bottom_performers_jensen.csv'

In [3]:
import pandas as pd

# Load the precomputed list of Top and Bottom performers
file_path_top_bottom = "monthly_top_bottom_performers_jensen.csv"
df_top_bottom = pd.read_csv(file_path_top_bottom, sep=",", index_col=0)

# Convert index to datetime to match the format in ret.csv
df_top_bottom.index = pd.to_datetime(df_top_bottom.index, format="%Y-%m", errors='coerce')

# Load the returns dataset (ret.csv)
file_path_returns = "ret.csv"
df_returns = pd.read_csv(file_path_returns, sep=";", index_col=0)

# Convert index to datetime (DD.MM.YYYY format)
df_returns.index = pd.to_datetime(df_returns.index, format="%d.%m.%Y", errors='coerce')

# Convert returns index to monthly format (YYYY-MM) to match df_top_bottom
df_returns.index = df_returns.index.to_period("M")

# 🟢 🔹 **Usunięcie % i konwersja na float** 🔹 🟢
df_returns = df_returns.replace("%", "", regex=True).astype(str).applymap(lambda x: x.replace(",", "."))
df_returns = df_returns.apply(pd.to_numeric, errors='coerce')

# Drop rows with NaN values
df_returns = df_returns.dropna(how="all")

# Initialize list for results
avg_results_list = []

# Iterate through available months
for selected_month in df_top_bottom.index:
    period_month = selected_month.to_period("M")  # Convert to Period format

    if period_month in df_returns.index:
        # Retrieve the list of top and bottom performers
        top_firms = df_top_bottom.loc[selected_month, "Top Performers"]
        bottom_firms = df_top_bottom.loc[selected_month, "Bottom Performers"]

        # Ensure lists are properly formatted
        if isinstance(top_firms, str) and isinstance(bottom_firms, str):
            top_firms = [firm.strip() for firm in top_firms.split(",")]
            bottom_firms = [firm.strip() for firm in bottom_firms.split(",")]
        else:
            continue  # Skip iteration if data is not properly formatted

        # Get the returns for these firms from the returns dataset
        selected_returns_month = df_returns.loc[period_month]

        # Extract returns of top and bottom firms (only if they exist in returns dataset)
        valid_top_firms = [firm for firm in top_firms if firm in selected_returns_month]
        valid_bottom_firms = [firm for firm in bottom_firms if firm in selected_returns_month]

        if valid_top_firms and valid_bottom_firms:
            # 🟢 🔹 **Usunięcie błędnych wartości i konwersja na float** 🔹 🟢
            top_returns = selected_returns_month[valid_top_firms].apply(pd.to_numeric, errors='coerce').mean()
            bottom_returns = selected_returns_month[valid_bottom_firms].apply(pd.to_numeric, errors='coerce').mean()

            # Store the result
            avg_results_list.append(pd.DataFrame({
                "Date": [selected_month.strftime('%Y-%m')],
                "Avg Top Performers": [top_returns],
                "Avg Bottom Performers": [bottom_returns]
            }))

# Combine results into a single DataFrame
if avg_results_list:
    avg_results_df = pd.concat(avg_results_list, ignore_index=True).set_index("Date")
    # Save results to CSV
    output_file = "avg_top_bottom_performers_ret_jensen.csv"
    avg_results_df.to_csv(output_file, sep=",", encoding="utf-8")

    print(f"\n✅ Results saved to: {output_file}")
    print(avg_results_df)
else:
    print("\n❌ No matching data found. Check the formatting of the files.")


  df_returns = df_returns.replace("%", "", regex=True).astype(str).applymap(lambda x: x.replace(",", "."))


PermissionError: [Errno 13] Permission denied: 'avg_top_bottom_performers_ret_jensen.csv'