# Eikon DS Loans Data Regrouping and Extraction

This Python script regroups data from multiple text files into yearly data, extracts a specific column ('Borrower SDC Cusip'), removes duplicates, and saves the results into Excel files.

In [1]:
import os
import pandas as pd

# Define a function to regroup files by year
def regroup_files_by_year(directory):
    # Initialize a dictionary to store dataframes for each year
    dataframes = {}

    # Iterate over all files in the directory
    for filename in os.listdir(directory):
        # Check if the file is a txt file and starts with "eikon DS loans"
        if filename.endswith(".txt") and filename.startswith("eikon DS loans"):
            # Extract the year from the filename
            year = '20' + filename.split('-')[1][4:6]

            # Read the file into a dataframe
            df = pd.read_csv(os.path.join(directory, filename), delimiter="\t")

            # If the year is already in the dictionary, append the new data
            if year in dataframes:
                dataframes[year] = pd.concat([dataframes[year], df])
            # Otherwise, add the new data
            else:
                dataframes[year] = df

    # Return the dictionary of dataframes
    return dataframes

# Call the function to regroup the files in the current directory
dataframes = regroup_files_by_year(r"C:\Users\GODLEWSKI\OneDrive - unistra.fr\DATA\THOMSON REUTERS\NEW 2022 + 2024\Loans\eikon DS loans")

# Iterate over the dataframes for each year
for year, df in dataframes.items():
    # Save the dataframe to a new Excel file
    df.to_excel(f"eikon DS loans {year}.xlsx", index=False)

    # Extract 'Borrower SDC Cusip' and remove duplicates
    cusips = df['Borrower SDC Cusip '].drop_duplicates()

    # Save the cusips to a new Excel file
    cusips.to_excel(f"eikon DS loans {year} cusips.xlsx", index=False)

# Print a success message
print("The files have been regrouped by year, the duplicates have been dropped, and the 'Borrower SDC Cusip' values have been extracted and saved as Excel files.")


The files have been regrouped by year, the duplicates have been dropped, and the 'Borrower SDC Cusip' values have been extracted and saved as Excel files.
