In [None]:
import os
import csv

# Base folder containing the downloaded data
base_folder = '../Date wise data'

# Output CSV file
output_csv = 'pso_data.csv'

# Predefined list of months in order from Jan to Dec
months_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

# Function to extract PSO data from a text file
def extract_pso_data(file_path, date_str):
    pso_data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            if 'PSO' in line:  # Check if the line contains 'PSO'
                data_parts = line.split(',')  # Assuming comma-separated values (CSV format)
                pso_data.append([date_str] + data_parts)  # Append date and extracted data
    return pso_data

# Create the output CSV file
with open(output_csv, 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    
    # Column labels based on the provided sample data structure
    csv_writer.writerow([
        'Date',         # The date extracted from the folder structure
        'Type',         # 'REG'
        'Symbol',       # 'PSO'
        'Company Name', # 'Pakistan State Oil Co Ltd.'
        'Volume',       # '170'
        'High Price',   # '463.8375'
        'Low Price',    # '419.6625'
        'Closing Price' # '441.75'
    ])
    
    # Iterate through the year folders
    for year_folder in os.listdir(base_folder):
        year_folder_path = os.path.join(base_folder, year_folder)
        if os.path.isdir(year_folder_path) and year_folder.isdigit() and int(year_folder) >= 2014:
            print(f"Processing year folder: {year_folder}")  # Debugging output

            # Process months in predefined order from Jan to Dec
            for month_folder in months_order:
                month_folder_path = os.path.join(year_folder_path, month_folder)
                if os.path.isdir(month_folder_path):
                    print(f"  Processing month folder: {month_folder}")  # Debugging output

                    for day_folder in os.listdir(month_folder_path):
                        day_folder_path = os.path.join(month_folder_path, day_folder)
                        if os.path.isdir(day_folder_path):
                            print(f"    Processing day folder: {day_folder}")  # Debugging output

                            for file_name in os.listdir(day_folder_path):
                                if file_name.endswith('.txt'):
                                    file_path = os.path.join(day_folder_path, file_name)
                                    print(f"      Processing file: {file_path}")  # Debugging output
                                    
                                    # Assuming date is in the format year/month/day
                                    date_str = f"{year_folder}-{month_folder}-{day_folder}"
                                    
                                    # Extract PSO data from the file
                                    pso_data = extract_pso_data(file_path, date_str)
                                    
                                    # Write each row to the CSV
                                    for row in pso_data:
                                        csv_writer.writerow(row)

print("PSO data extraction completed and saved to", output_csv)