In [None]:
"""
This Python application is designed to extract data from a CSV file and create separate pandas DataFrames 
for specific rows that contain key words such as 'Host Details', 'Panelist Details', and 'Attendee Details'. 
The program reads the CSV file row by row and detects the target rows containing the key words using a flag variable. 
When a target row is found, the program creates a new DataFrame and appends the subsequent rows to it until the next 
target row is found. Once all the target rows have been processed, the program returns a list of 
DataFrames containing the data from the CSV file that corresponds to each of the target rows. This application 
can be useful for data processing and analysis tasks that require separate DataFrames for different types of data, 
such as attendance reports or meeting logs.
"""

In [68]:
import os
import pandas as pd

key_words = ['Host Details', 'Panelist Details', 'Attendee Details']
df_names = [x.replace(' ', '_').lower() for x in key_words]

# Set the path to the folder containing the CSV files
folder_path = 'data_files/csv/'

# Get a list of all CSV files in the folder
csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

In [69]:
def section_data_list(file_path, key_words):
    all_rows = []
    key_words = key_words.copy()

    with open(file_path, 'r') as csvfile:
        csvreader = csv.reader(csvfile, delimiter=',', quotechar='"')

        # Read the CSV file row by row and process each row data
        for row in csvreader:
            all_rows.append(row)

    key_indexes = []
    for k in key_words:
        for i, row in enumerate(all_rows):
            if k in row:
                key_indexes.append(i)
                
    var_names = [x.replace(' ', '_').lower() for x in key_words]
    
    # Create an empty dictionary to store the variables
    variables = {}
    
    for i in range(len(var_names)):
        # Get the start and end indices for the current variable
        start = key_indexes[i]
        end = key_indexes[i+1] if i < len(var_names)-1 else None

        variables[var_names[i]] = all_rows[start:end]
    
    return variables

In [70]:
def extract_section_data(key, section_data_list):
    
    data = section_data_list[key]
    
    # Create an empty list to store the rows after the target row
    length_of_row = []
    
    for row in data[1:]:
        length_of_row.append(len(row))

    # Identify the minimum value from the list of lenght of rows
    min_value = min(length_of_row)
    
    data_m = data[1:]

    for i, row in enumerate(data_m):
        if len(row) > min_value:
            data_m[i] = row[:-1]
        else:
            data_m[i] = row

    # Create a pandas DataFrame from the list of rows after the target row
    df = pd.DataFrame(data_m[1:], columns=data_m[0])
    
    return df 

In [71]:
section_data_list = section_data_list(file_path, key_words)
dfs = []
for key in df_names:
    dfs.append(extract_section_data(key, section_data_list))

In [73]:
for i, df in enumerate(dfs):
    with pd.ExcelWriter(f'{i}output.xlsx') as writer:
        df.to_excel(writer, sheet_name='Sheet1', index=False)