In [1]:
import pandas as pd


def split_csv_to_dfs(filename):
  """
  This function reads a CSV file with specific markers and splits it into multiple DataFrames.

  Args:
      filename: The path to the CSV file.

  Returns:
      A dictionary where keys are section names and values are DataFrames.
  """
  dfs = {}
  current_section = None
  data = []
  with open(filename, 'r') as f:
    for line in f:
      line = line.strip()
      if not line:
        continue
      if line.startswith(','):  # Check for data lines
        data.append([x.strip() for x in line.split(',')])
      else:
        if current_section:
          dfs[current_section] = pd.DataFrame(data)
          data = []
        current_section = line.split()[0].strip()  # Extract section name
  if data:
    dfs[current_section] = pd.DataFrame(data)
  return dfs


# Read the CSV file
dfs = split_csv_to_dfs("TCS.csv")

# Print the section names and DataFrames
for section_name, df in dfs.items():
  print(f"Section: {section_name}")
  print(df.to_string())
  print("\n")


Section: COMPANY
Empty DataFrame
Columns: []
Index: []


Section: LATEST
Empty DataFrame
Columns: []
Index: []


Section: CURRENT
  0  1  2  3  4  5  6  7  8  9  10
0                                 


Section: META
Empty DataFrame
Columns: []
Index: []


Section: Number
Empty DataFrame
Columns: []
Index: []


Section: Face
  0  1  2  3  4  5  6  7  8  9  10
0                                 
1                                 
2                                 
3                                 
4                                 
5                                 
6                                 


Section: Current
Empty DataFrame
Columns: []
Index: []


Section: Market
  0  1  2  3  4  5  6  7  8  9  10
0                                 
1                                 
2                                 
3                                 
4                                 


Section: PROFIT
Empty DataFrame
Columns: []
Index: []


Section: Report
Empty DataFrame
Columns: []
Index: 

In [2]:
import pandas as pd

# Read the CSV file
file_path = 'TCS.csv'
df = pd.read_csv(file_path, header=None)

# Function to identify and extract sections
def extract_section(df, start_indicator, end_indicator=None):
    start_idx = df[df[0] == start_indicator].index[0]
    if end_indicator:
        try:
            end_idx = df[df[0] == end_indicator].index[0]
        except IndexError:
            end_idx = len(df)
    else:
        end_idx = len(df)
    return df.iloc[start_idx:end_idx].reset_index(drop=True)

# Extract different sections
meta_section = extract_section(df, ' META ')
profit_loss_section = extract_section(df, ' PROFIT & LOSS ', ' Quarters ')
quarters_section = extract_section(df, ' Quarters ', ' BALANCE SHEET ')
balance_sheet_section = extract_section(df, ' BALANCE SHEET ', ' CASH FLOW: ')
cash_flow_section = extract_section(df, ' CASH FLOW: ', ' PRICE: ')
price_section = extract_section(df, ' PRICE: ', ' DERIVED: ')
derived_section = extract_section(df, ' DERIVED: ')

# Clean and convert sections to dataframes
def clean_section(section_df):
    section_df.columns = section_df.iloc[0]
    section_df = section_df[1:].reset_index(drop=True)
    return section_df

meta_df = clean_section(meta_section)
profit_loss_df = clean_section(profit_loss_section)
quarters_df = clean_section(quarters_section)
balance_sheet_df = clean_section(balance_sheet_section)
cash_flow_df = clean_section(cash_flow_section)
price_df = clean_section(price_section)
derived_df = clean_section(derived_section)

# Save dataframes to CSV or other formats if needed
meta_df.to_csv('meta.csv', index=False)
profit_loss_df.to_csv('profit_loss.csv', index=False)
quarters_df.to_csv('quarters.csv', index=False)
balance_sheet_df.to_csv('balance_sheet.csv', index=False)
cash_flow_df.to_csv('cash_flow.csv', index=False)
price_df.to_csv('price.csv', index=False)
derived_df.to_csv('derived.csv', index=False)

# Optionally, you can return the dataframes for further use
dataframes = {
    "meta": meta_df,
    "profit_loss": profit_loss_df,
    "quarters": quarters_df,
    "balance_sheet": balance_sheet_df,
    "cash_flow": cash_flow_df,
    "price": price_df,
    "derived": derived_df
}

# Example usage: print the meta dataframe
print(dataframes["meta"])


0                            META          NaN       NaN       NaN       NaN  \
0                Number of shares      361.81        NaN       NaN       NaN   
1                      Face Value            1       NaN       NaN       NaN   
2                   Current Price      3851.45       NaN       NaN       NaN   
3           Market Capitalization   1393488.32       NaN       NaN       NaN   
4                              NaN         NaN       NaN       NaN       NaN   
..                             ...         ...       ...       ...       ...   
83                             NaN         NaN       NaN       NaN       NaN   
84                         PRICE:      1276.98   1260.15    1215.9   1424.58   
85                             NaN         NaN       NaN       NaN       NaN   
86                       DERIVED:          NaN       NaN       NaN       NaN   
87   Adjusted Equity Shares in Cr      391.75    394.09    394.09    382.86    

0        NaN       NaN       NaN       

In [6]:
dataframes["balance_sheet"]

Unnamed: 0,BALANCE SHEET,NaN,NaN.1,NaN.2,NaN.3,NaN.4,NaN.5,NaN.6,NaN.7,NaN.8,NaN.9
0,Report Date,Mar-15,Mar-16,Mar-17,Mar-18,Mar-19,Mar-20,Mar-21,Mar-22,Mar-23,Mar-24
1,Equity Share Capital,195.87,197,197,191,375,375,370,366,366,362
2,Reserves,50438.89,70875,86017,84937,89071,83751,86063,88773,90058,90127
3,Borrowings,357.7,245,289,247,62,8174,7795,7818,7688,8021
4,Other Liabilities,22325.46,16974,15830,19751,24393,27820,35764,43967,44747,46962
5,Total,73317.92,88291,102333,105126,113901,120120,129992,140924,142859,145472
6,Net Block,11638.17,11774,11701,11973,12290,20928,21021,21298,20515,19604
7,Capital Work in Progress,2766.37,1670,1541,1278,963,906,926,1205,1234,1564
8,Investments,1661.78,22822,41980,36008,29330,26356,29373,30485,37163,31762
9,Other Assets,57251.6,52025,47111,55867,71318,71930,78672,87936,83947,92542
