In [21]:
import pandas as pd
import numpy as np
import sqlite3

# Create DB File and Connect

In [22]:
# Connect to the database. If it does not exist, it will be created
conn = sqlite3.connect('test3.db')
# Create a cursor object. This is how we interact with the database
curs = conn.cursor()


### IMPORTANT!!! ###
# By default, sqlite does not enforce foreign key constraints. 
# According to the documentation, this is for backwards compatibility. You have to turn them on yourself.
curs.execute('PRAGMA foreign_keys=ON;')


<sqlite3.Cursor at 0x198d6f1aac0>

## Define and Create TSample

In [4]:
# Define the tables

# If running this more than once, you'll need to drop the table before you can redefine it

curs.execute("DROP TABLE IF EXISTS tSample;")

sql = """
CREATE TABLE tSample (
    sample_id TEXT PRIMARY KEY,
    site_id TEXT,
    collect_date DATETIME
)
;"""
curs.execute(sql)

<sqlite3.Cursor at 0x1ec42ba0b20>

In [5]:
tsampledf = pd.read_csv('./csv_folder/sample.csv')

In [6]:
# INSERT INTO (column names) VALUES (values to insert)
# The : indicates parameter names.
# They do not need to be the same as the columns we are inserting into, but it helps keep the code clean.
# They are matched up in order. For example, if instead of :lat I called it :x, then :x would be inserted into
# the lat column, since lat is the second column in the list of column names.
sql = """
INSERT INTO tSample (sample_id, site_id, collect_date) VALUES (:sample_id, :site_id, :date)
;"""

# I'm going to loop over the dataframe such that each row will be returned as a dictionary.
# The keys for the dictionary will be the column names, and the values will be the values in the dataframe.
# I've ensured that the names in my dataframe match exactly the parameter names above - the way this works
# is that SQL will look for that key in the dictionary.  For example, since I have a parameter :loc_id, 
# it will look for a key in the dictionary called loc_id.





for row in tsampledf.to_dict(orient='records'):
    # Uncomment this to see the dictionaries
    #print(row)
    
    # The second input for curs.execute() are parameters
    curs.execute(sql, row)

In [7]:
sql = """
CREATE VIEW tSample AS 

SELECT *
;"""

pd.read_sql("SELECT * FROM tSample;", conn)

Unnamed: 0,sample_id,site_id,collect_date
0,BLDTT0,BLD,6/29/2020
1,BLDTT1,BLD,6/29/2020
2,BLDTT2,BLD,6/29/2020
3,BLDTT3,BLD,6/29/2020
4,BLDTT4,BLD,6/29/2020
5,BLDTT5,BLD,6/29/2020
6,BLDTT6,BLD,6/29/2020
7,BLDTT7,BLD,6/29/2020
8,BLDTT8,BLD,6/29/2020
9,BLDTT9,BLD,6/29/2020


## Define and Create TMorphology

In [8]:
tmorphdf = pd.read_csv('./csv_folder/plant_morphology.csv')
tmorphdf.shape

(493, 12)

In [9]:
tmorphdf = tmorphdf.rename(columns={'Ht(cm)': 'plant_ht', '# Leaves': 'num_leaves', 'Length LL (cm)': 'longest_leaf_cm',
                                     'Width LL (cm)': 'widest_leaf_cm', 'Stem Diameter (mm)': 'stem_diam_mm',
                                     '# Inflorescences': 'num_inflores', '# Flowers Per (x2)': 'num_flowers',
                                     'Herbivory': 'herbivory', 'Species': 'species'})
tmorphdf = tmorphdf.drop('date', axis= 1)
tmorphdf = tmorphdf.drop('site_id',  axis = 1)
tmorphdf.head()

Unnamed: 0,sample_id,species,plant_ht,num_leaves,longest_leaf_cm,widest_leaf_cm,stem_diam_mm,num_inflores,num_flowers,herbivory
0,BLDTT0,,93.3,22.0,16.5,7.3,10.5,5.0,"46(1), 53(1)","1=5, 2=1"
1,BLDTT0,,,,,,,,,
2,BLDTT1,,70.7,14.0,15.4,7.1,8.9,5.0,"20(2), 25(5)","1=2, 2=3"
3,BLDTT1,,,,,,,,,
4,BLDTT2,,83.4,18.0,15.6,7.4,11.9,7.0,"24(3), 19(4)","1=3, 2=1"


In [10]:
tmorphdf = tmorphdf.drop_duplicates(subset=['sample_id'])
tmorphdf.shape

(440, 10)

In [11]:
# Define the tables

# If running this more than once, you'll need to drop the table before you can redefine it

curs.execute("DROP TABLE IF EXISTS tMorphology;")

sql = """
CREATE TABLE tMorphology (
    sample_id TEXT PRIMARY KEY,
    species TEXT,
    plant_ht REAL,
    num_leaves INTEGER,
    longest_leaf_cm REAL,
    widest_leaf_cm REAL,
    stem_diam_mm REAL,
    num_inflores INTEGER,
    num_flowers INTEGER,
    herbivory TEXT
)
;"""
curs.execute(sql)

# INSERT INTO (column names) VALUES (values to insert)
# The : indicates parameter names.
# They do not need to be the same as the columns we are inserting into, but it helps keep the code clean.
# They are matched up in order. For example, if instead of :lat I called it :x, then :x would be inserted into
# the lat column, since lat is the second column in the list of column names.
sql = """
INSERT INTO tMorphology (sample_id, species, plant_ht, num_leaves,
    longest_leaf_cm, widest_leaf_cm, stem_diam_mm, num_inflores,
    num_flowers, herbivory) VALUES (:sample_id, :species, :plant_ht, :num_leaves,
    :longest_leaf_cm, :widest_leaf_cm, :stem_diam_mm, :num_inflores,
    :num_flowers, :herbivory)
;"""

# I'm going to loop over the dataframe such that each row will be returned as a dictionary.
# The keys for the dictionary will be the column names, and the values will be the values in the dataframe.
# I've ensured that the names in my dataframe match exactly the parameter names above - the way this works

# is that SQL will look for that key in the dictionary.  For example, since I have a parameter :loc_id, 
# it will look for a key in the dictionary called loc_id.





for row in tmorphdf.to_dict(orient='records'):
    # Uncomment this to see the dictionaries
    #print(row)
    
    # The second input for curs.execute() are parameters
    curs.execute(sql, row)

## Create TSoilNutrients

In [12]:
curs.execute("DROP TABLE IF EXISTS tSoilNutrients;")
#Organic Matter % Rate	Estimated Nitrogen Release (lbs/A)	Cation Exchange Capacity (meq/100g)	
#pH	Buffer pH	Phosphorus ppm	Potassium ppm	Calcium ppm	Magnesium ppm	
#Sulfur ppm	Sodium ppm	Zinc ppm	Manganese ppm	Iron ppm	Copper ppm	Boron ppm

sql = """
CREATE TABLE tSoilNutrients (
    sample_id TEXT NOT NULL REFERENCES tSample(sample_id),
    OM REAL,
    ENR REAL,
    CEC REAL,
    pH REAL,
    BpH REAL,
    Phosphorus REAL,
    Potassium REAL,
    Calcium REAL,
    Magnesium REAL,
    Sulfur REAL,
    Sodium REAL,
    Zinc REAL,
    Manganese REAL,
    Iron REAL,
    Copper REAL,
    Boron REAL,
    PRIMARY KEY (sample_id)
)
;"""

curs.execute(sql)

<sqlite3.Cursor at 0x1ec42ba0b20>

In [13]:
tsoilnutrientdf = pd.read_csv('./csv_folder/soil_nutrients.csv')
tsoilnutrientdf.shape

(423, 18)

In [14]:
tsoilnutrientdf = tsoilnutrientdf.drop('site', axis = 1)

In [15]:
tsoilnutrientdf = tsoilnutrientdf.drop_duplicates('sample_id')

In [16]:
tsoilnutrientdf.tail()

Unnamed: 0,sample_id,OM,ENR,CEC,pH,BpH,Phosphorus,Potassium,Calcium,Magnesium,Sulfur,Sodium,Zinc,Manganese,Iron,Copper,Boron
418,PNR13,5.8,142,20.4,7.7,6.93,21,78,3777,155,12,13,3.7,98,92,1.1,1.1
419,PNR14,4.2,110,22.3,7.6,6.93,9,45,4217,131,9,13,2.1,36,156,1.2,0.5
420,PNR15,3.6,98,18.7,7.7,6.93,8,42,3472,143,13,13,2.5,51,135,1.2,0.7
421,PNR8,3.7,100,17.9,7.6,6.93,12,37,3335,124,9,14,1.5,35,59,0.7,0.5
422,PNR9,3.7,100,18.7,7.5,6.93,18,48,3457,147,7,13,1.6,39,62,0.8,0.7


In [17]:
soil_list = tsoilnutrientdf['sample_id'].tolist()
temp_list = []
for i in soil_list:
    if i[0] == '8':
        temp_list.append(i)

for i in temp_list:
    tsoilnutrientdf.drop(tsoilnutrientdf[(tsoilnutrientdf['sample_id'] == i)].index, inplace=True)
    tsoilnutrientdf.drop(tsoilnutrientdf[(tsoilnutrientdf['sample_id'] == 'BLDRV1-')].index, inplace=True)
    tsoilnutrientdf.drop_duplicates(subset="sample_id",
                             keep='first', inplace=True)
soil_list = tsoilnutrientdf['sample_id'].tolist()
for i in range(len(soil_list)):
    if soil_list[i][0] == '1':
        soil_list[i] = soil_list[i][1:]
    if soil_list[i][0] == ' ':
        soil_list[i] = soil_list[i][1:]
print(len(soil_list))

368


In [18]:
tsoilnutrientdf['sample_id'] = soil_list
tsoilnutrientdf.tail()

Unnamed: 0,sample_id,OM,ENR,CEC,pH,BpH,Phosphorus,Potassium,Calcium,Magnesium,Sulfur,Sodium,Zinc,Manganese,Iron,Copper,Boron
418,PNR13,5.8,142,20.4,7.7,6.93,21,78,3777,155,12,13,3.7,98,92,1.1,1.1
419,PNR14,4.2,110,22.3,7.6,6.93,9,45,4217,131,9,13,2.1,36,156,1.2,0.5
420,PNR15,3.6,98,18.7,7.7,6.93,8,42,3472,143,13,13,2.5,51,135,1.2,0.7
421,PNR8,3.7,100,17.9,7.6,6.93,12,37,3335,124,9,14,1.5,35,59,0.7,0.5
422,PNR9,3.7,100,18.7,7.5,6.93,18,48,3457,147,7,13,1.6,39,62,0.8,0.7


In [19]:
# INSERT INTO (column names) VALUES (values to insert)
# The : indicates parameter names.
# They do not need to be the same as the columns we are inserting into, but it helps keep the code clean.
# They are matched up in order. For example, if instead of :lat I called it :x, then :x would be inserted into
# the lat column, since lat is the second column in the list of column names.
sql = """
INSERT INTO tSoilNutrients (sample_id, OM, ENR, CEC, pH, BpH,
    Phosphorus, Potassium, Calcium, Magnesium, Sulfur, Sodium,
    Zinc, Manganese, Iron, Copper, Boron) VALUES (:sample_id, :OM, :ENR, :CEC, :pH, :BpH,
    :Phosphorus, :Potassium, :Calcium, :Magnesium, :Sulfur, :Sodium,
    :Zinc, :Manganese, :Iron, :Copper, :Boron)
;"""


# I'm going to loop over the dataframe such that each row will be returned as a dictionary.
# The keys for the dictionary will be the column names, and the values will be the values in the dataframe.
# I've ensured that the names in my dataframe match exactly the parameter names above - the way this works
# is that SQL will look for that key in the dictionary.  For example, since I have a parameter :loc_id, 
# it will look for a key in the dictionary called loc_id.





for row in tsoilnutrientdf.to_dict(orient='records'):
    # Uncomment this to see the dictionaries
    
    # The second input for curs.execute() are parameters
    curs.execute(sql, row)

In [20]:
sql = """
CREATE VIEW tSoilNutrients AS 

SELECT *
;"""

pd.read_sql("SELECT * FROM tSoilNutrients;", conn)

Unnamed: 0,sample_id,OM,ENR,CEC,pH,BpH,Phosphorus,Potassium,Calcium,Magnesium,Sulfur,Sodium,Zinc,Manganese,Iron,Copper,Boron
0,BLDTT0,4.3,124.0,8.1,6.8,6.91,18.0,180.0,1193.0,178.0,3.0,7.0,2.3,321.0,78.0,1.5,0.5
1,BLDTT11,4.7,135.0,6.6,6.0,6.83,22.0,195.0,781.0,138.0,5.0,9.0,1.9,85.0,76.0,1.0,0.4
2,BLDTT12,4.6,133.0,6.6,6.1,6.84,23.0,266.0,770.0,131.0,5.0,9.0,1.6,78.0,82.0,0.9,0.3
3,BLDTT13,4.9,138.0,6.8,6.1,6.84,24.0,270.0,741.0,175.0,1.0,9.0,2.2,76.0,80.0,1.1,0.3
4,BLDTT14,5.4,148.0,7.3,5.8,6.79,32.0,229.0,766.0,172.0,8.0,9.0,2.6,64.0,87.0,1.0,0.3
5,BLDTT15,4.5,130.0,7.1,6.1,6.83,25.0,335.0,751.0,177.0,8.0,9.0,2.0,76.0,81.0,1.2,0.4
6,BLDTT16,5.0,140.0,7.1,6.0,6.82,14.0,152.0,859.0,151.0,10.0,9.0,1.7,163.0,79.0,1.1,0.4
7,BLDTT17,5.0,139.0,7.6,6.3,6.85,15.0,233.0,931.0,180.0,6.0,7.0,2.0,129.0,70.0,1.3,0.4
8,BLDTT18,4.7,134.0,7.0,5.7,6.78,19.0,112.0,790.0,144.0,5.0,8.0,2.0,94.0,95.0,1.5,0.4
9,BLDTT19,4.8,134.0,8.6,6.4,6.85,18.0,325.0,1042.0,201.0,3.0,9.0,2.3,102.0,87.0,1.4,0.5


## Create and fill table TPlantNutrients

In [21]:
curs.execute("DROP TABLE IF EXISTS tPlantNutrients;")


sql = """
CREATE TABLE tPlantNutrients (
    sample_id TEXT NOT NULL REFERENCES tSample(sample_id),
    N_percent_dm REAL,
    P_percent_dm REAL,
    K_percent_dm REAL,
    Ca_percent_dm REAL,
    Mg_percent_dm REAL,
    Na_percent_dm REAL,
    S_percent_dm REAL,
    Zn_ppm_dm REAL,
    Mn_ppm_dm REAL,
    Fe_ppm_dm REAL,
    Cu_ppm_dm REAL,
    B_ppm_dm REAL,
    Al_ppm_dm REAL,
    PRIMARY KEY (sample_id)
)
;"""
curs.execute(sql)

<sqlite3.Cursor at 0x1ec42ba0b20>

In [22]:
tleafnutrientdf = pd.read_csv('./csv_folder/leaf_nutrients.csv')

In [23]:
tleafnutrientdf.shape

(223, 14)

In [24]:

sql = """
INSERT INTO tPlantNutrients (sample_id, N_percent_dm, P_percent_dm, K_percent_dm, Ca_percent_dm, 
Mg_percent_dm, Na_percent_dm, S_percent_dm, Zn_ppm_dm, Mn_ppm_dm, Fe_ppm_dm, Cu_ppm_dm, B_ppm_dm, Al_ppm_dm)
VALUES (:Sample_ID, :N_percent_dm, :P_percent_dm, :K_percent_dm, :Ca_percent_dm, 
:Mg_percent_dm, :Na_percent_dm, :S_percent_dm, :Zn_ppm_dm, :Mn_ppm_dm, :Fe_ppm_dm, :Cu_ppm_dm, :B_ppm_dm, :Al_ppm_dm)
;"""

# I'm going to loop over the dataframe such that each row will be returned as a dictionary.
# The keys for the dictionary will be the column names, and the values will be the values in the dataframe.
# I've ensured that the names in my dataframe match exactly the parameter names above - the way this works
# is that SQL will look for that key in the dictionary.  For example, since I have a parameter :loc_id, 
# it will look for a key in the dictionary called loc_id.





for row in tleafnutrientdf.to_dict(orient='records'):
    # Uncomment this to see the dictionaries
    #print(row)
    
    # The second input for curs.execute() are parameters
    curs.execute(sql, row)

In [25]:
sql = """
CREATE VIEW tPlantNutrients AS 

SELECT *
;"""

pd.read_sql("SELECT * FROM tPlantNutrients;", conn)

Unnamed: 0,sample_id,N_percent_dm,P_percent_dm,K_percent_dm,Ca_percent_dm,Mg_percent_dm,Na_percent_dm,S_percent_dm,Zn_ppm_dm,Mn_ppm_dm,Fe_ppm_dm,Cu_ppm_dm,B_ppm_dm,Al_ppm_dm
0,RRL1,,0.21,4.01,0.83,0.29,0.01,0.37,29.0,281.0,234.0,8.0,24.0,130.0
1,RRL2,2.24,0.16,5.34,1.05,0.32,0.01,0.29,20.0,423.0,76.0,5.0,27.0,36.0
2,RRL3,,0.20,4.75,0.63,0.27,0.04,0.22,21.0,469.0,68.0,5.0,32.0,18.0
3,RRL4,,0.18,4.74,1.25,0.32,0.01,0.28,18.0,680.0,78.0,6.0,36.0,27.0
4,RRL5,3.00,0.26,3.51,1.20,0.44,0.01,0.29,29.0,507.0,66.0,7.0,23.0,22.0
5,RRL6,3.26,0.19,2.55,0.73,0.50,0.01,0.28,21.0,324.0,69.0,6.0,21.0,26.0
6,RRL7,,0.21,3.67,0.88,0.22,0.01,0.32,17.0,509.0,65.0,8.0,25.0,18.0
7,RRL8,,0.19,3.14,0.70,0.25,0.01,0.39,15.0,276.0,91.0,8.0,22.0,58.0
8,RRL9,3.39,0.18,3.23,0.65,0.30,0.01,0.35,17.0,476.0,72.0,8.0,30.0,23.0
9,RRL10,,0.18,3.29,1.37,0.38,0.01,0.28,28.0,538.0,61.0,6.0,25.0,26.0


## Create and Fill Soil Micro Table

In [46]:
curs.execute("DROP TABLE IF EXISTS tSoilMicrobeGenera;")

sql = """
CREATE TABLE tSoilMicrobeGenera (
    sample_id TEXT NOT NULL REFERENCES tSample(sample_id),
    genera TEXT,
    relative_abundance REAL,
    PRIMARY KEY (sample_id, genera)
)
;"""
curs.execute(sql)

<sqlite3.Cursor at 0x1ec42ba0b20>

In [47]:
tsoilmicrobe = pd.read_csv('./csv_folder/final_soil_microbe.csv')

In [49]:
sql = """
INSERT INTO tSoilMicrobeGenera (sample_id, genera, relative_abundance)
VALUES (:sample_id, :genera, :relative_abundance)
;"""

# I'm going to loop over the dataframe such that each row will be returned as a dictionary.
# The keys for the dictionary will be the column names, and the values will be the values in the dataframe.
# I've ensured that the names in my dataframe match exactly the parameter names above - the way this works
# is that SQL will look for that key in the dictionary.  For example, since I have a parameter :loc_id, 
# it will look for a key in the dictionary called loc_id.





for row in tsoilmicrobe.to_dict(orient='records'):
    # Uncomment this to see the dictionaries
    #print(row)
    
    # The second input for curs.execute() are parameters
    curs.execute(sql, row)

In [51]:
sql = """
CREATE VIEW tSoilMicrobeGenera AS 

SELECT *
;"""

testdf = pd.read_sql("SELECT * FROM tSoilMicrobeGenera;", conn)

## Create and Fill Plant Micro Table

In [25]:
plant_micro = pd.read_csv('./csv_folder/final_plant_microbe.csv')

In [26]:
plant_micro.head()

Unnamed: 0.1,Unnamed: 0,sample_id,genera,relative_abundance
0,0,RRL1,Archaea|Euryarchaeota|Halobacteria|Halobacteri...,0.0
1,1,RRL2,Archaea|Euryarchaeota|Halobacteria|Halobacteri...,0.01333
2,2,RRL3,Archaea|Euryarchaeota|Halobacteria|Halobacteri...,0.0
3,3,RRL4,Archaea|Euryarchaeota|Halobacteria|Halobacteri...,0.00943
4,4,RRL5,Archaea|Euryarchaeota|Halobacteria|Halobacteri...,0.00581


In [27]:
curs.execute("DROP TABLE IF EXISTS tPlantMicrobeGenera;")

sql = """
CREATE TABLE tPlantMicrobeGenera (
    sample_id TEXT NOT NULL REFERENCES tSample(sample_id),
    genera TEXT,
    relative_abundance REAL,
    PRIMARY KEY (sample_id, genera)
)
;"""
curs.execute(sql)

<sqlite3.Cursor at 0x198d6f1aac0>

In [28]:
sql = """
INSERT INTO tPlantMicrobeGenera (sample_id, genera, relative_abundance)
VALUES (:sample_id, :genera, :relative_abundance)
;"""

# I'm going to loop over the dataframe such that each row will be returned as a dictionary.
# The keys for the dictionary will be the column names, and the values will be the values in the dataframe.
# I've ensured that the names in my dataframe match exactly the parameter names above - the way this works
# is that SQL will look for that key in the dictionary.  For example, since I have a parameter :loc_id, 
# it will look for a key in the dictionary called loc_id.





for row in plant_micro.to_dict(orient='records'):
    # Uncomment this to see the dictionaries
    #print(row)
    
    # The second input for curs.execute() are parameters
    curs.execute(sql, row)

conn.commit()
conn.close()


In [23]:
sql = """
CREATE VIEW tPlantMicrobeGenera AS 

SELECT *
;"""

test_df = pd.read_sql("SELECT * FROM tPlantMicrobeGenera;", conn)

In [24]:
test_df.shape

(0, 3)

In [20]:
conn.close()

## Create and Fill Plant DNA Table

In [12]:
plant_dna = pd.read_csv('./csv_folder/final_plant_dna.csv')

In [13]:
plant_dna.head()

Unnamed: 0.1,Unnamed: 0,V1,V2,Name,Species,PC1,PC2,PC3,PC4,PC5
0,0,0.753244,0.246756,RRL1,exaltata,0.194769,0.048276,-0.02388,-0.017138,-0.039842
1,1,0.581366,0.418634,RRL9,exaltata,0.097719,0.094733,-0.00656,0.012822,-0.022248
2,2,0.591513,0.408487,RRL17,exaltata,0.099527,0.091996,0.004098,-0.021347,0.00719
3,3,0.048066,0.951934,FRW3,syriaca,-0.139465,-0.064022,-0.009963,-0.047536,0.024757
4,4,0.003946,0.996054,FRW12,syriaca,-0.155135,0.055455,0.009907,-0.043334,-0.009337


In [14]:
curs.execute("DROP TABLE IF EXISTS tPlantDNA;")

sql = """
CREATE TABLE tPlantDNA (
    sample_id TEXT NOT NULL REFERENCES tSample(sample_id),
    V1 REAL,
    V2 REAL,
    species TEXT,
    PC1 REAL,
    PC2 REAL,
    PC3 REAL,
    PC4 REAL,
    PC5 REAL,
    PRIMARY KEY (sample_id)
)
;"""
curs.execute(sql)

<sqlite3.Cursor at 0x1d6ef441b90>

In [15]:
sql = """
INSERT INTO tPlantDNA (
    sample_id,
    V1,
    V2,
    species,
    PC1,
    PC2,
    PC3,
    PC4,
    PC5
)
VALUES (
    :Name,
    :V1,
    :V2,
    :Species,
    :PC1,
    :PC2,
    :PC3,
    :PC4,
    :PC5
)
;"""

# I'm going to loop over the dataframe such that each row will be returned as a dictionary.
# The keys for the dictionary will be the column names, and the values will be the values in the dataframe.
# I've ensured that the names in my dataframe match exactly the parameter names above - the way this works
# is that SQL will look for that key in the dictionary.  For example, since I have a parameter :loc_id, 
# it will look for a key in the dictionary called loc_id.





for row in plant_dna.to_dict(orient='records'):
    # Uncomment this to see the dictionaries
    print(row)
    
    # The second input for curs.execute() are parameters
    curs.execute(sql, row)

{'PC1': 0.19476886, 'PC2': 0.048276406, 'PC4': -0.017137507, 'Unnamed: 0': 0, 'Name': 'RRL1', 'PC3': -0.023879902, 'Species': 'exaltata', 'V1': 0.753243943, 'PC5': -0.039841764, 'V2': 0.246756057}
{'PC1': 0.09771936, 'PC2': 0.094733246, 'PC4': 0.012822087, 'Unnamed: 0': 1, 'Name': 'RRL9', 'PC3': -0.006560023000000001, 'Species': 'exaltata', 'V1': 0.581365683, 'PC5': -0.022248093, 'V2': 0.418634317}
{'PC1': 0.099527225, 'PC2': 0.09199582, 'PC4': -0.021346599, 'Unnamed: 0': 2, 'Name': 'RRL17', 'PC3': 0.004098483, 'Species': 'exaltata', 'V1': 0.591513172, 'PC5': 0.007190188, 'V2': 0.408486828}
{'PC1': -0.13946539, 'PC2': -0.064021535, 'PC4': -0.047536157, 'Unnamed: 0': 3, 'Name': 'FRW3', 'PC3': -0.009963222, 'Species': 'syriaca', 'V1': 0.04806566, 'PC5': 0.024756748, 'V2': 0.95193434}
{'PC1': -0.15513538, 'PC2': 0.05545539, 'PC4': -0.04333402, 'Unnamed: 0': 4, 'Name': 'FRW12', 'PC3': 0.009907202, 'Species': 'syriaca', 'V1': 0.003946191, 'PC5': -0.009336631, 'V2': 0.996053809}
{'PC1': -0.1

In [16]:
conn.commit()

In [17]:
sql = """
CREATE VIEW tPlantDNA AS 

SELECT *
;"""

test_df = pd.read_sql("SELECT * FROM tPlantDNA;", conn)

In [18]:
test_df.head()

Unnamed: 0,sample_id,V1,V2,species,PC1,PC2,PC3,PC4,PC5
0,RRL1,0.753244,0.246756,exaltata,0.194769,0.048276,-0.02388,-0.017138,-0.039842
1,RRL9,0.581366,0.418634,exaltata,0.097719,0.094733,-0.00656,0.012822,-0.022248
2,RRL17,0.591513,0.408487,exaltata,0.099527,0.091996,0.004098,-0.021347,0.00719
3,FRW3,0.048066,0.951934,syriaca,-0.139465,-0.064022,-0.009963,-0.047536,0.024757
4,FRW12,0.003946,0.996054,syriaca,-0.155135,0.055455,0.009907,-0.043334,-0.009337


In [19]:
conn.close()