## testing file imports

In [5]:
import pandas as pd
from sqlalchemy import create_engine
import os

# Define your PostgreSQL connection parameters
db_user = "inesschwartz"
db_password = ""  # leave empty if you're not using one
db_host = "localhost"
db_port = "5432"
db_name = "solos_db"

# Create SQLAlchemy engine
engine = create_engine(f"postgresql+psycopg2://{db_user}@{db_host}:{db_port}/{db_name}")

# Path to your CSV files folder
csv_folder = "/Users/inesschwartz/GreenDataScience/Thesis/tables_clean"  # <-- change this to the actual path

# Loop over all CSVs and import each one
for filename in os.listdir(csv_folder):
    if filename.endswith(".csv"):
        filepath = os.path.join(csv_folder, filename)
        table_name = os.path.splitext(filename)[0]  # use filename (without .csv) as table name

        print(f"Importing {filename} as table '{table_name}'...")

        # Read the CSV
        df = pd.read_csv(filepath)

        # Upload to PostgreSQL
        df.to_sql(table_name, engine, if_exists="replace", index=False)

        print(f"✔️  Finished importing '{table_name}'")

print("✅ All CSVs imported successfully.")


Importing soil_type_clean.csv as table 'soil_type_clean'...
✔️  Finished importing 'soil_type_clean'
Importing analyses_clean.csv as table 'analyses_clean'...
✔️  Finished importing 'analyses_clean'
Importing geology_mapping.csv as table 'geology_mapping'...
✔️  Finished importing 'geology_mapping'
Importing topo_feat_clean.csv as table 'topo_feat_clean'...
✔️  Finished importing 'topo_feat_clean'
Importing lithology1954_mapping.csv as table 'lithology1954_mapping'...
✔️  Finished importing 'lithology1954_mapping'
Importing profile_record_clean.csv as table 'profile_record_clean'...
✔️  Finished importing 'profile_record_clean'
Importing morphology_horizon_clean.csv as table 'morphology_horizon_clean'...


  df = pd.read_csv(filepath)


✔️  Finished importing 'morphology_horizon_clean'
Importing lithology_mapping.csv as table 'lithology_mapping'...
✔️  Finished importing 'lithology_mapping'
Importing climate_feat_clean.csv as table 'climate_feat_clean'...
✔️  Finished importing 'climate_feat_clean'
Importing site_info_clean.csv as table 'site_info_clean'...
✔️  Finished importing 'site_info_clean'
Importing samples_clean.csv as table 'samples_clean'...
✔️  Finished importing 'samples_clean'
Importing districts_clean.csv as table 'districts_clean'...
✔️  Finished importing 'districts_clean'
Importing geo_feat_clean.csv as table 'geo_feat_clean'...
✔️  Finished importing 'geo_feat_clean'
✅ All CSVs imported successfully.


Proposed SQL Schema

In [None]:
-- PROFILE TABLE
CREATE TABLE profile (
    profile_record_id VARCHAR PRIMARY KEY,
    profile_id VARCHAR,
    site_info_id INTEGER REFERENCES site_info(site_info_id),
    sample_id INTEGER REFERENCES samples(sample_id),
    soil_type_id INTEGER REFERENCES soil_type(soil_type_id)
);

-- SAMPLES TABLE
CREATE TABLE samples (
    sample_id INTEGER PRIMARY KEY,
    site_info_id INTEGER REFERENCES site_info(site_info_id),
    profile_record_id VARCHAR REFERENCES profile(profile_record_id),
    shelf INTEGER,
    room VARCHAR,
    year INTEGER,
    horizon_id VARCHAR REFERENCES morphology_horizon(horizon_id)
);

-- DISTRICTS TABLE
CREATE TABLE districts (
    district_id INT PRIMARY KEY,
    sample_id INTEGER REFERENCES samples(sample_id),
    profile_record_id VARCHAR REFERENCES profile(profile_record_id),
    district_name VARCHAR
);

-- CLIMATE TABLE
CREATE TABLE climate ( 
    climate_id INT PRIMARY KEY,
    profile_record_id VARCHAR REFERENCES profile(profile_record_id),
    mean_annual_temp VARCHAR,
    mean_annual_precip VARCHAR, 
    koppen_climate VARCHAR,
    thornthwaite_climate VARCHAR,
    hydric_regime VARCHAR,
    thermal_regime VARCHAR
);

-- ANALYSES TABLE
CREATE TABLE analyses (
    lab_sample_id SERIAL PRIMARY KEY,
    sample_id INTEGER REFERENCES samples(sample_id),
    EG INT,
    thick_clay FLOAT,
    fine_clay FLOAT,
    silt FLOAT,
    clay FLOAT,
    Eq_Hum FLOAT,
    atm_1_3 FLOAT,
    atm_15 FLOAT,
    CACO4 FLOAT,
    gypsum FLOAT,
    free_iron FLOAT,
    organic_carbon FLOAT,
    total_N FLOAT,
    P205 FLOAT,
    organic_material FLOAT,
    pH_H2O FLOAT,
    pH_KCL FLOAT,
    Ca FLOAT,
    Mg FLOAT,
    Na FLOAT,
    K FLOAT,
    exchangable_bases_sum FLOAT,	
    CEC FLOAT,
    V FLOAT,
    conductivity FLOAT,
    soluble_sodium FLOAT,
    Min_lt_0002 FLOAT,
    Min_005_002 FLOAT,
    Min_02_005 FLOAT,
    Min_2_02 FLOAT,
    field_sample_code FLOAT,	
    Depth FLOAT,
    Al FLOAT,
    Si FLOAT,
    P FLOAT,
    S FLOAT,
    Cl FLOAT,
    Ti FLOAT,
    Cr FLOAT,
    Mn FLOAT,
    Fe FLOAT,
    Co FLOAT,
    Ni FLOAT,
    Cu FLOAT,
    Zn FLOAT,
    As FLOAT,
    Se FLOAT,
    Rb FLOAT,
    Sr FLOAT,
    Zr FLOAT,
    Nb FLOAT,
    Mo FLOAT,
    Cd FLOAT,
    Sn FLOAT,
    Sb FLOAT,
    Ba FLOAT,
    Ta FLOAT,
    W FLOAT,
    Pt FLOAT,
    Au FLOAT,
    Hg FLOAT,
    Tl FLOAT,
    Pb FLOAT,
    Bi FLOAT,
    Th FLOAT,
    U FLOAT
);

-- GEOLOGICAL FEATURES TABLE
CREATE TABLE geo_feat (
    geo_features_id SERIAL PRIMARY KEY,
    profile_record_id VARCHAR REFERENCES profile(profile_record_id),
    geology_id VARCHAR,
    lithology_id VARCHAR,
    lithology_1954_id VARCHAR
);

-- MORPHOLOGY HORIZON TABLE
CREATE TABLE morphology_horizon (
    horizon_id VARCHAR PRIMARY KEY,
    sample_id INTEGER REFERENCES samples(sample_id),
    profile_record_id VARCHAR REFERENCES profile(profile_record_id),
    horizon_layer VARCHAR,
    upper_depth INTEGER,
    lower_depth INTEGER,
    moisture_degree VARCHAR,
    root_quantity VARCHAR,
    root_diameter VARCHAR,
    texture VARCHAR,
    structure_type VARCHAR,
    structure_class VARCHAR,
    structure_degree VARCHAR,
    pore_diameter VARCHAR,
    pore_quantity VARCHAR,
    pore_shape VARCHAR,
    dry_color_name VARCHAR,
    dry_hue VARCHAR,
    dry_value INTEGER,
    dry_chroma FLOAT,
    moist_color_name VARCHAR,
    moist_hue VARCHAR,
    moist_value INTEGER,
    moist_chroma FLOAT,
    compaction VARCHAR,
    durability VARCHAR
);

-- SITE INFORMATION TABLE
CREATE TABLE site_info (
    site_info_id INTEGER PRIMARY KEY,
    profile_record_id VARCHAR REFERENCES profile(profile_record_id),
    X_coord FLOAT,
    Y_coord FLOAT,
    sampling_date DATE,
    districts_id INTEGER REFERENCES districts(district_id),
    climate_id INTEGER REFERENCES climate(climate_id),
    topo_features_id INTEGER REFERENCES topo_feat(topo_features_id),
    geo_features_id INTEGER REFERENCES geo_feat(geo_features_id)
);

-- SOIL TYPE TABLE
CREATE TABLE soil_type (
    soil_type_id INTEGER PRIMARY KEY,
    profile_record_id VARCHAR REFERENCES profile(profile_record_id),
    grouping VARCHAR,
    CEP_GR VARCHAR,
    CEP_NAME VARCHAR,
    FAO VARCHAR
);

-- TOPOGRAPHIC FEATURES TABLE
CREATE TABLE topo_feat (
    topo_features_id INTEGER PRIMARY KEY,
    profile_record_id VARCHAR REFERENCES profile(profile_record_id),
    slope_code VARCHAR,
    altitude INTEGER,
    aspect VARCHAR,
    land_surface_temp FLOAT,
    dem_elevation FLOAT
);
