# General Plant Care Guide Assembly and Database Import

Name: Zihan, Klarissa

## Step 1 - Imports and Path Setup

In [1]:
import os
import json
import pandas as pd
from glob import glob

# Paths
details_dir = "01_raw_data/01_species_details"
care_dir = "01_raw_data/02_care_guide"
output_path = "02_wrangled_data/Table03_GeneralPlantCareGuideTable.csv"

## Step 2 - Cycle Mapping

In [2]:
# Cycle label mapping
cycle_map = {
    "Perennial": "Every year",
    "Annual": "Once a year",
    "Biennial": "Every 2 years"
}

## Step 3 - Care Guide Parser

In [3]:
# Care guide parser
def parse_care_sections(sections):
    guide = {"watering_guide": None, "sunlight_guide": None, "pruning_guide": None}
    for section in sections:
        section_type = section.get("type")
        description = section.get("description")
        if section_type and description:
            if section_type == "watering":
                guide["watering_guide"] = description
            elif section_type == "sunlight":
                guide["sunlight_guide"] = description
            elif section_type == "pruning":
                guide["pruning_guide"] = description
    return guide

## Step 4 - Flatten General + Care Data

In [4]:
# Flatten general plant record with care guide
def flatten_general_care(details, care):
    plant_id = details.get("id")
    if plant_id > 3000:
        return None  # Skip threatened plants

    # Extract care guide sections
    care_sections = care.get("data", [{}])[0].get("section", []) if care else []
    guide = parse_care_sections(care_sections)

    # Compose watering benchmark
    benchmark = details.get("watering_general_benchmark", {})
    benchmark_str = None
    if benchmark.get("value") and benchmark.get("unit"):
        benchmark_str = f"At least once {benchmark['value']} {benchmark['unit']}"

    # Compose pruning count
    pruning_count_list = details.get("pruning_count", [])
    pruning_str = None
    if isinstance(pruning_count_list, list) and pruning_count_list:
        first_entry = pruning_count_list[0]
        if isinstance(first_entry, dict) and "amount" in first_entry and "interval" in first_entry:
            pruning_str = f"{first_entry['amount']} times {first_entry['interval']}"

    # Compose flowers detail
    flowers_detail = None
    if details.get("flowers") and details.get("flowering_season"):
        flowers_detail = f"Flowers in {details['flowering_season']}"

    return {
        "general_plant_id": plant_id,
        "watering": details.get("watering"),
        "watering_general_benchmark": benchmark_str,
        # Store as JSON strings to fit MySQL Workbench
        "sunlight": json.dumps(details.get("sunlight", []), ensure_ascii=False),
        "soil": json.dumps(details.get("soil", []), ensure_ascii=False),
        "drought_tolerant": details.get("drought_tolerant", False),
        "salt_tolerant": details.get("salt_tolerant", False),
        "pruning_month": json.dumps(details.get("pruning_month", []), ensure_ascii=False),
        "pruning_count": pruning_str,
        "pest_susceptibility": json.dumps(details.get("pest_susceptibility", []), ensure_ascii=False),
        "flowers_detail": flowers_detail,
        "harvest_season": details.get("harvest_season"),
        "growth_rate": details.get("growth_rate"),
        "maintenance": details.get("maintenance"),
        "care_level": details.get("care_level"),
        "watering_guide": guide["watering_guide"],
        "sunlight_guide": guide["sunlight_guide"],
        "pruning_guide": guide["pruning_guide"]
    }

## Step 5 - Collect File Lists

In [5]:
# Load general plant JSON files
detail_files = glob(os.path.join(details_dir, "plant_species_details_*.json"))
care_files = glob(os.path.join(care_dir, "plant_species_care_guide_*.json"))

## Step 6 - Build Care Guide Lookup

In [6]:
# Build care guide lookup by species_id
care_lookup = {}
for file in care_files:
    with open(file, "r", encoding="utf-8") as f:
        care_data = json.load(f)
        species_id = care_data.get("data", [{}])[0].get("species_id")
        if species_id:
            care_lookup[species_id] = care_data

## Step 7 - Flatten and Combine Records

In [7]:
flattened_data = []
for file in detail_files:
    with open(file, "r", encoding="utf-8") as f:
        details = json.load(f)
        species_id = details.get("id")
        care = care_lookup.get(species_id)
        record = flatten_general_care(details, care)
        if record:
            flattened_data.append(record)

## Step 8 - Build DataFrame and Column Order

In [8]:
# Create DataFrame and sort
df = pd.DataFrame(flattened_data)
df = df.sort_values(by="general_plant_id").reset_index(drop=True)
df["general_plant_id"] = pd.to_numeric(df["general_plant_id"], errors="coerce").astype("Int64")

# Column order
ordered_cols = [
    "general_plant_id", "watering", "watering_general_benchmark", "sunlight", "soil",
    "drought_tolerant", "salt_tolerant", "pruning_month", "pruning_count",
    "pest_susceptibility", "flowers_detail", "harvest_season", "growth_rate",
    "maintenance", "care_level", "watering_guide", "sunlight_guide", "pruning_guide"
]
df = df[ordered_cols]

## Step 9 - Save CSV

In [9]:
# Save to CSV
os.makedirs(os.path.dirname(output_path), exist_ok=True)
df.to_csv(output_path, index=False)

## Step 10 - Import General Plant Care Guide Table into MySQL

In [10]:
import mysql.connector
from mysql.connector import Error

# Database connection configuration
db_config = {
    'host': 'database-plantx.cqz06uycysiz.us-east-1.rds.amazonaws.com',
    'user': 'zihan',
    'password': '2002317Yzh12138.',
    'database': 'FIT5120_PlantX_Database',
    'allow_local_infile': True,
    'use_pure': True  # Use pure Python implementation
}

try:
    # Establish connection
    connection = mysql.connector.connect(**db_config)
    
    if connection.is_connected():
        print("Successfully connected to MySQL server")
        
        # Create cursor
        cursor = connection.cursor()
        
        # Construct LOAD DATA LOCAL INFILE command
        load_data_query = """
        LOAD DATA LOCAL INFILE '02_wrangled_data/Table03_GeneralPlantCareGuideTable.csv'
        INTO TABLE Table03_GeneralPlantCareGuideTable
        CHARACTER SET utf8mb4
        FIELDS TERMINATED BY ',' 
        OPTIONALLY ENCLOSED BY '"'
        LINES TERMINATED BY '\\r\\n'
        IGNORE 1 LINES
        (   
            general_plant_id, watering, watering_general_benchmark, sunlight, soil,
            drought_tolerant, salt_tolerant, pruning_month, pruning_count,
            pest_susceptibility, flowers_detail, harvest_season, growth_rate, maintenance,
            care_level, watering_guide, sunlight_guide, pruning_guide
        );
        """
        
        # Execute command
        cursor.execute(load_data_query)
        connection.commit()  # Commit transaction
        
        print(f"Data import successful! {cursor.rowcount} rows affected.")
        
except Error as e:
    print(f"Error occurred during execution: {e}")
    
finally:
    # Close connection
    if connection.is_connected():
        cursor.close()
        connection.close()
        print("MySQL connection closed.")

Successfully connected to MySQL server
Data import successful! 891 rows affected.
MySQL connection closed.


## Step 11 - Verify Imported Rows and Preview

In [11]:
# In the same connection session, or in a new one
try:
    connection = mysql.connector.connect(**db_config)
    cursor = connection.cursor()
    
    cursor.execute("SELECT COUNT(*) FROM Table03_GeneralPlantCareGuideTable")
    row_count = cursor.fetchone()[0]
    print(f"The table currently contains {row_count} rows")
    
    # Preview first few rows
    cursor.execute("SELECT * FROM Table03_GeneralPlantCareGuideTable LIMIT 5")
    rows = cursor.fetchall()
    for row in rows:
        print(row)
        
except Error as e:
    print(f"Error occurred during query: {e}")
finally:
    if connection.is_connected():
        cursor.close()
        connection.close()

The table currently contains 1484 rows
(1, 'Frequent', 'At least once "7-10" days', '["full sun"]', '[]', 'False', 'False', '["February", "March", "April"]', '', '[]', '', '', 'High', '', 'Medium', "European Silver Fir 'Abies Alba' should be watered about once a week, depending on the season and weather conditions. During the growing season (spring to mid-summer), water deeply and thoroughly. For the rest of the year, water only when the soil is dry. When watering, ensure that the root system is saturated. Do not over-water, as this can lead to root rot.", "European Silver Fir 'Abies Alba' is a species of coniferous tree that grows best in sunny areas. This species of plant requires a minimum of 6 hours of sunlight per day throughout the growing season. Ideally, it should receive up to 8 hours of direct sunlight in the summer months, and 4-5 hours of sunlight in the winter. It is important to note that European Silver Fir 'Abies Alba' is not tolerant of shade and may be damaged if expo