In [1]:
import pandas as pd
import os

def analyze_csv_schema(csv_file):
    """Analyze the schema of a CSV file and return SQL-like schema information"""
    try:
        # Read the CSV file
        df = pd.read_csv(csv_file)
        
        # Get column info
        schema_info = []
        for column in df.dtypes.items():
            col_name = column[0]
            # Map pandas dtypes to SQL-like types
            if 'int' in str(column[1]):
                sql_type = 'INTEGER'
            elif 'float' in str(column[1]):
                sql_type = 'FLOAT'
            elif 'datetime' in str(column[1]):
                sql_type = 'DATETIME'
            elif 'bool' in str(column[1]):
                sql_type = 'BOOLEAN'
            else:
                sql_type = 'VARCHAR'
            
            # Check if column has any null values
            nullable = 'NULL' if df[col_name].isnull().any() else 'NOT NULL'
            
            schema_info.append(f"{col_name} {sql_type} {nullable}")
        
        # Format schema output
        table_name = os.path.splitext(os.path.basename(csv_file))[0].upper()
        schema = f"CREATE TABLE {table_name} (\n    " + ",\n    ".join(schema_info) + "\n);"
        
        return schema
    
    except Exception as e:
        return f"Error analyzing schema: {str(e)}"

In [2]:
# Example usage:
schema = analyze_csv_schema('final_forecast_sheet.csv')
print(schema)

CREATE TABLE FINAL_FORECAST_SHEET (
    id INTEGER NOT NULL,
    DC_FC_Assets_Type VARCHAR NULL,
    DC_FC_Assets_Name VARCHAR NOT NULL,
    SQL_FC_Account_ID VARCHAR NOT NULL,
    SQL_Heading_Sequence INTEGER NOT NULL,
    SQL_Sequence FLOAT NOT NULL,
    SQL_Account_Name_Code VARCHAR NOT NULL,
    SQL_Account_Name VARCHAR NOT NULL,
    SQL_Account_Category_Order_Code VARCHAR NOT NULL,
    SQL_Account_Category_Order VARCHAR NOT NULL,
    SUB_Account_Category_Order_Code VARCHAR NULL,
    SUB_Account_Category_Order VARCHAR NULL,
    SQL_Account_Group_Name_Code FLOAT NULL,
    SQL_Account_Group_Name FLOAT NULL,
    Accountnumber_ID VARCHAR NOT NULL,
    January FLOAT NOT NULL,
    February FLOAT NOT NULL,
    March FLOAT NOT NULL,
    April FLOAT NOT NULL,
    May FLOAT NOT NULL,
    June FLOAT NOT NULL,
    July FLOAT NULL,
    August FLOAT NULL,
    September FLOAT NULL,
    October FLOAT NULL,
    November FLOAT NULL,
    December FLOAT NULL,
    Total FLOAT NULL,
    Account_Year INT