In [1]:
import pandas as pd
import json
import os
from glob import glob

def json_files_to_dataframe(directory_path):
    """
    Convert each JSON file into one row in DataFrame
    """
    # Get all JSON files in directory
    json_files = glob(os.path.join(directory_path, '*.json'))
    
    rows = []
    
    for file_path in json_files:
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
                
            # Add filename for reference
            if isinstance(data, dict):
                data['filename'] = os.path.basename(file_path)
            
            rows.append(data)
            
        except Exception as e:
            print(f"Error processing {file_path}: {e}")
            # Add empty row with filename to maintain order
            rows.append({'filename': os.path.basename(file_path), 'error': str(e)})
    
    # Create DataFrame where each file is one row
    df = pd.DataFrame(rows)
    return df

# Usage
df = json_files_to_dataframe('H:\Python\Exception_Handling\Feature Engineering\json files')
print(f"Created DataFrame with {len(df)} rows (files)")
df.head()

  df = json_files_to_dataframe('H:\Python\Exception_Handling\Feature Engineering\json files')


Created DataFrame with 19 rows (files)


Unnamed: 0,ticket_id,priority,category,status,created_date,resolved_date,assigned_to,user,subject,description,resolution,resolution_steps,time_spent_minutes,tags,impact,root_cause,prevention_measures,filename
0,TKT-002,CRITICAL,Security,Resolved,2024-01-16T11:20:00Z,2024-01-17T16:30:00Z,Sarah Davis,"{'name': 'Bob Wilson', 'department': 'Marketin...",Suspected Security Breach,Security monitoring detected unauthorized acce...,Blocked malicious IPs and implemented addition...,[Analyzed security logs and identified attack ...,1270,"[security, breach, firewall, authentication, m...",Critical - Potential data exposure,Weak password policies and insufficient monito...,Enhanced security policies and real-time monit...,ticket_002_security_breach.json
1,TKT-003,HIGH,Network,Resolved,2024-01-17T08:15:00Z,2024-01-17T12:30:00Z,Mike Chen,"{'name': 'Carlos Rodriguez', 'department': 'HR...",Network Outage - Building A,Complete network outage affecting entire Build...,Replaced failed network switch and restored co...,[Identified failed core network switch in Buil...,255,"[network, outage, switch, connectivity, infras...",High - Entire building offline,Hardware failure in core network switch,Implement redundant switches and monitoring,ticket_003_network_outage.json
2,TKT-004,MEDIUM,Email,Resolved,2024-01-18T14:30:00Z,2024-01-18T17:45:00Z,Lisa Brown,"{'name': 'David Kim', 'department': 'IT', 'ema...",Email Server Overwhelmed by Spam,Email server is receiving massive amounts of s...,Updated spam filters and blocked malicious sen...,[Analyzed spam patterns and identified source ...,195,"[email, spam, performance, filtering, security]",Medium - Email delays and performance issues,Insufficient spam filtering and outdated rules,Regular spam filter updates and monitoring,ticket_004_email_spam.json
3,TKT-005,MEDIUM,Software,Resolved,2024-01-19T10:45:00Z,2024-01-19T13:20:00Z,Tom Wilson,"{'name': 'Emma Thompson', 'department': 'Sales...",Software License Compliance Issue,Software audit revealed that company is using ...,Purchased additional licenses and implemented ...,[Conducted comprehensive software audit across...,155,"[license, compliance, audit, software, legal]",Medium - Legal compliance risk,Insufficient license tracking and management,Implement license management system and regula...,ticket_005_software_license.json
4,TKT-006,HIGH,Backup,Resolved,2024-01-20T16:00:00Z,2024-01-21T09:30:00Z,Alex Johnson,"{'name': 'Jennifer Lee', 'department': 'Operat...",Backup Data Corruption,Critical backup files are corrupted and cannot...,Restored from secondary backup location and im...,[Identified corrupted backup files and extent ...,1050,"[backup, corruption, disaster_recovery, data_i...",High - Data recovery risk,Insufficient backup verification and monitoring,Implement automated backup verification and mu...,ticket_006_backup_corruption.json


In [2]:
df.to_csv('output.csv', index=False)
print("CSV file saved as 'output.csv'")

CSV file saved as 'output.csv'


In [3]:
# Check if file exists
if os.path.exists('output.csv'):
    print("CSV file created successfully")
    
    # Read it back to verify
    df_check = pd.read_csv('output.csv')
    print(f"Verified CSV has {len(df_check)} rows")
else:
    print("CSV file was not created")

CSV file created successfully
Verified CSV has 19 rows


In [4]:
# First, implement the process_file function
def process_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        result = {
            'filename': os.path.basename(file_path),
            'file_path': file_path,
            'file_size': os.path.getsize(file_path),
            'data': data
        }
        return result
        
    except Exception as e:
        return {
            'filename': os.path.basename(file_path),
            'error': str(e),
            'data': None
        }

# Then modify main function
def json_files_to_dataframe_improved(directory_path):
    json_files = glob(os.path.join(directory_path, '*.json'))
    rows = [process_file(f) for f in json_files]
    return pd.DataFrame(rows)

df = json_files_to_dataframe_improved('H:\Python\Exception_Handling\Feature Engineering\json files')

# Analysis requires unpacking:
names = [row['data']['name'] for row in df['data'] if row and 'name' in row]
large_files = df[df['file_size'] > 1000]['filename']  # Rich metadata!

df.head()

  df = json_files_to_dataframe_improved('H:\Python\Exception_Handling\Feature Engineering\json files')


Unnamed: 0,filename,file_path,file_size,data
0,ticket_002_security_breach.json,H:\Python\Exception_Handling\Feature Engineeri...,1357,"{'ticket_id': 'TKT-002', 'priority': 'CRITICAL..."
1,ticket_003_network_outage.json,H:\Python\Exception_Handling\Feature Engineeri...,1307,"{'ticket_id': 'TKT-003', 'priority': 'HIGH', '..."
2,ticket_004_email_spam.json,H:\Python\Exception_Handling\Feature Engineeri...,1322,"{'ticket_id': 'TKT-004', 'priority': 'MEDIUM',..."
3,ticket_005_software_license.json,H:\Python\Exception_Handling\Feature Engineeri...,1362,"{'ticket_id': 'TKT-005', 'priority': 'MEDIUM',..."
4,ticket_006_backup_corruption.json,H:\Python\Exception_Handling\Feature Engineeri...,1380,"{'ticket_id': 'TKT-006', 'priority': 'HIGH', '..."


In [5]:
df.to_csv('output_improved.csv', index=False)