# General Plant Description Table Assembly and Database Import

Name: Zihan, Klarissa

## Step 1 - Imports and Path Setup

In [1]:
import os
import json
import pandas as pd
from glob import glob

# Paths
input_dir = "01_raw_data/01_species_details"
output_path = "02_wrangled_data/Table02_GeneralPlantDescriptionTable.csv"

## Step 2 - Mapping and Flatten Function

In [2]:
# Cycle label mapping
cycle_map = {
    "Perennial": "Every year",
    "Annual": "Once a year",
    "Biennial": "Every 2 years"
}

# Flatten general plant record
def flatten_general_description(data):
    plant_id = data.get("id")
    if plant_id > 3000:
        return None  # Skip threatened plants

    return {
        "general_plant_id": plant_id,
        "if_edible": data.get("edible_fruit", False) or data.get("edible_leaf", False) or data.get("cuisine", False),
        "if_indoors": data.get("indoor", False),
        "if_medicinal": data.get("medicinal", False),
        "if_poisonous": data.get("poisonous_to_humans", False) or data.get("poisonous_to_pets", False),
        "if_fruits": data.get("fruits", False),
        "if_flowers": data.get("flowers", False),
        "plant_type": data.get("type"),
        "plant_cycle": cycle_map.get(data.get("cycle"), data.get("cycle")),
        "attracts": json.dumps(data.get("attracts", []), ensure_ascii=False),  # Adjust to fit MySQL Workbench
        "propagation": json.dumps(data.get("propagation", []), ensure_ascii=False),  # Adjust to fit MySQL Workbench
        "description": data.get("description")
    }

## Step 3 - Load and Flatten JSON Files

In [3]:
# Load and flatten JSON file
json_files = glob(os.path.join(input_dir, "plant_species_details_*.json"))
flattened_data = []

for file in json_files:
    with open(file, "r", encoding="utf-8") as f:
        data = json.load(f)
        record = flatten_general_description(data)
        if record:
            flattened_data.append(record)

## Step 4 - Build DataFrame and Export CSV

In [4]:
# Create DataFrame and sort
df = pd.DataFrame(flattened_data)
df = df.sort_values(by="general_plant_id").reset_index(drop=True)

df["general_plant_id"] = pd.to_numeric(df["general_plant_id"], errors="coerce").astype("Int64")

ordered_cols = [
    "general_plant_id", "if_edible", "if_indoors", "if_medicinal", "if_poisonous",
    "if_fruits", "if_flowers", "plant_type", "plant_cycle",
    "attracts", "propagation", "description"
]
df = df[ordered_cols]

os.makedirs(os.path.dirname(output_path), exist_ok=True)
df.to_csv(output_path, index=False)

## Step 5 - Import General Plant Description Table into MySQL

In [5]:
import mysql.connector
from mysql.connector import Error

# Database connection configuration
db_config = {
    'host': 'database-plantx.cqz06uycysiz.us-east-1.rds.amazonaws.com',
    'user': 'zihan',
    'password': '2002317Yzh12138.',
    'database': 'FIT5120_PlantX_Database',
    'allow_local_infile': True,
    'use_pure': True  # Use pure Python implementation
}

try:
    # Establish connection
    connection = mysql.connector.connect(**db_config)
    
    if connection.is_connected():
        print("Successfully connected to MySQL server")
        
        # Create cursor
        cursor = connection.cursor()
        
        # Construct LOAD DATA LOCAL INFILE command
        load_data_query = """
            LOAD DATA LOCAL INFILE '02_wrangled_data/Table02_GeneralPlantDescriptionTable.csv'
            INTO TABLE Table02_GeneralPlantDescriptionTable
            CHARACTER SET utf8mb4
            FIELDS TERMINATED BY ',' 
            OPTIONALLY ENCLOSED BY '"'
            LINES TERMINATED BY '\\r\\n'
            IGNORE 1 LINES
            (   
                general_plant_id, if_edible, if_indoors, if_medicinal, if_poisonous,
                if_fruits, if_flowers, plant_type, plant_cycle, attracts, propagation,
                description
            );
        """
        
        # Execute command
        cursor.execute(load_data_query)
        connection.commit()  # Commit transaction
        
        print(f"Data import successful! {cursor.rowcount} rows affected.")
        
except Error as e:
    print(f"Error occurred during execution: {e}")
    
finally:
    # Close connection
    if connection.is_connected():
        cursor.close()
        connection.close()
        print("MySQL connection closed.")

Successfully connected to MySQL server
Data import successful! 891 rows affected.
MySQL connection closed.


## Step 6 - Verify Imported Rows and Preview

In [6]:
# In the same connection session, or in a new one
try:
    connection = mysql.connector.connect(**db_config)
    cursor = connection.cursor()
    
    cursor.execute("SELECT COUNT(*) FROM Table02_GeneralPlantDescriptionTable")
    row_count = cursor.fetchone()[0]
    print(f"The table currently contains {row_count} rows")
    
    # Preview first few rows
    cursor.execute("SELECT * FROM Table02_GeneralPlantDescriptionTable LIMIT 5")
    rows = cursor.fetchall()
    for row in rows:
        print(row)
        
except Error as e:
    print(f"Error occurred during query: {e}")
finally:
    if connection.is_connected():
        cursor.close()
        connection.close()

The table currently contains 1484 rows
(1, 'False', 'False', 'True', 'False', 'False', 'False', 'tree', 'Every year', '[]', '["Cutting", "Grafting Propagation", "Layering Propagation", "Seed Propagation", "Air Layering Propagation", "Tissue Culture"]', 'European Silver Fir (Abies alba) is an amazing coniferous species native to mountainous regions of central Europe and the Balkans. It is an evergreen tree with a narrow, pyramidal shape and long, soft needles. Its bark is scaly grey-brown and its branches are highly ornamental due to its conical-shaped silver-tinged needles. It is pruned for use as an ornamental evergreen hedging and screening plant, and is also popular for use as a Christmas tree. Young trees grow quickly and have strong, flexible branches which makes them perfect for use as windbreaks. The European Silver Fir is an impressive species, making it ideal for gardens and public spaces.')
(2, 'False', 'False', 'False', 'False', 'False', 'False', 'tree', 'Every year', '[]', 