In [1]:
# Imports
import sqlite3
import pandas as pd
import os

In [2]:
# Clone the daylilies table → master_daylily

# create connection
conn = sqlite3.connect("daylilies.db")  
cur = conn.cursor()

# make the cell re-runnable
cur.execute("DROP TABLE IF EXISTS master_daylily;")

cur.execute("""
    CREATE TABLE master_daylily AS
    SELECT *
    FROM daylilies;
""")

conn.commit()
print("master_daylily table created.")


master_daylily table created.


In [3]:
# Append regional data (left-join on abbreviation code)
cur = conn.cursor()

cur.execute("DROP TABLE IF EXISTS master_daylily_regional;")

cur.execute("""
    CREATE TABLE master_daylily_regional AS
    SELECT  d.*,
            l.Region,
            l.Name        AS primary_hybridizer,
            l.City,
            l.State,
            l.Country
    FROM master_daylily AS d
    LEFT JOIN location_data AS l
           ON d.hybridizer = l."ABBREVIATION CODE";
""")

# Replace the working master table
cur.execute("DROP TABLE master_daylily;")
cur.execute("ALTER TABLE master_daylily_regional RENAME TO master_daylily;")

conn.commit()
print("Regional data merged into master_daylily.")


Regional data merged into master_daylily.


In [4]:
# Append network statistics (left-join on name ⇢ variety)
cur = conn.cursor()

cur.execute("DROP TABLE IF EXISTS master_daylily_final;")

cur.execute("""
    CREATE TABLE master_daylily_final AS
    SELECT  m.*,
            n.Direct_Children,
            n.Total_Descendants,
            n.Avg_Generation_Impact,
            n.Descendant_Success_Rate,
            n.Breeding_Span,
            n.Yearly_Impact,
            n.In_Degree_Centrality,
            n.Out_Degree_Centrality,
            n.PageRank,
            n.Katz_Centrality,
            n.Betweenness_Centrality
    FROM master_daylily AS m
    LEFT JOIN network_influence_metrics AS n
           ON m.name = n.Variety;
""")

# swap in the new master table
cur.execute("DROP TABLE master_daylily;")
cur.execute("ALTER TABLE master_daylily_final RENAME TO master_daylily;")

conn.commit()
print("Network statistics merged into master_daylily.")

Network statistics merged into master_daylily.


In [5]:
# Make parent and child lists
cur = conn.cursor()

# Parents list for each variety (child → comma-separated parents)
cur.execute("DROP TABLE IF EXISTS parent_lists;")
cur.execute("""
    CREATE TABLE parent_lists AS
    SELECT  target     AS variety,
            group_concat(DISTINCT source) AS parents
    FROM     parent_child_relationships
    WHERE    source IS NOT NULL
      AND    target IS NOT NULL
    GROUP BY target;
""")

# Children list for each variety (parent → comma-separated children)
cur.execute("DROP TABLE IF EXISTS child_lists;")
cur.execute("""
    CREATE TABLE child_lists AS
    SELECT  source     AS variety,
            group_concat(DISTINCT target) AS children
    FROM     parent_child_relationships
    WHERE    source IS NOT NULL
      AND    target IS NOT NULL
    GROUP BY source;
""")

conn.commit()
print(" Aggregated parent_lists + child_lists tables created.")


 Aggregated parent_lists + child_lists tables created.


In [6]:
# Merge lists into master_daylily
cur = conn.cursor()

cur.execute("DROP TABLE IF EXISTS master_daylily_rel;")
cur.execute("""
    CREATE TABLE master_daylily_rel AS
    SELECT  m.*,
            p.parents,
            c.children
    FROM     master_daylily        AS m
    LEFT JOIN parent_lists         AS p  ON m.name = p.variety
    LEFT JOIN child_lists          AS c  ON m.name = c.variety;
""")

# replace master table
cur.execute("DROP TABLE master_daylily;")
cur.execute("ALTER TABLE master_daylily_rel RENAME TO master_daylily;")

conn.commit()
print(" Parent & child lists merged into master_daylily (final).")

 Parent & child lists merged into master_daylily (final).


In [7]:
conn.close()
print("Database connection closed.")

Database connection closed.


In [8]:
# Export master_daylily

# DB connection
conn = sqlite3.connect("daylilies.db")
cur = conn.cursor()

# Read in the full master table
df_master = pd.read_sql_query("SELECT * FROM master_daylily;", conn)

# Ensure the data directory exists
os.makedirs("data", exist_ok=True)

# Save to CSV
csv_path = "data/master_daylily.csv"
df_master.to_csv(csv_path, index=False)

print(f"Exported {len(df_master):,} rows to {csv_path}")

Exported 101,446 rows to data/master_daylily.csv
