In [1]:
import pandas as pd
import numpy as np
import sqlite3

In [2]:
checking_views = True

# Create DB File and Connect

In [3]:
# Connect to the database. If it does not exist, it will be created
conn = sqlite3.connect('new_db_3.db')
# Create a cursor object. This is how we interact with the database
curs = conn.cursor()


### IMPORTANT!!! ###
# By default, sqlite does not enforce foreign key constraints. 
# According to the documentation, this is for backwards compatibility. You have to turn them on yourself.
curs.execute('PRAGMA foreign_keys=ON;')


<sqlite3.Cursor at 0x18ea4336740>

## Create Sample Table

In [4]:
# Define the tables

# If running this more than once, you'll need to drop the table before you can redefine it

curs.execute("DROP TABLE IF EXISTS tSample;")

sql = """
CREATE TABLE tSample (
    sample_id TEXT PRIMARY KEY,
    site_id TEXT,
    BGR TEXT,
    collect_date DATETIME
)
;"""
curs.execute(sql)

<sqlite3.Cursor at 0x18ea4336740>

In [5]:
tsampledf = pd.read_csv('sql_data_files/sample.csv')

In [6]:
tsampledf = tsampledf[tsampledf.columns.drop('Unnamed: 0')]

In [7]:
tsampledf['site_id'].unique()

array(['BLD', 'SKY', 'KCK', 'WART', 'SBR', 'BTU', 'RRL', 'FRW', 'MMP',
       'CMB', 'SLG', 'LFS', 'SGC', 'RGT', 'HR', 'PTW', 'GH', 'LM', 'MKP',
       'RF', 'TCP', 'PNR'], dtype=object)

In [8]:
site_list = tsampledf['site_id']

site_list = site_list.map({'BLD': 'other', 'SKY': 'other', 'KCK': 'other', 
                           'WART': 'other', 'SBR': 'other', 'GH': 'other', 
                           'TCP': 'other', 'CMB': 1, 'FRW': 1, 'LFS': 1, 
                           'LM': 0, 'MMP': 1, 'MKP': 2, 'PNR': 2, 'PTW': 0, 
                           'RF': 2, 'RGT': 0, 'RRL': 0, 'SGC': 0, 'SLG': 1,
                           'HR': 3, 'BTU': 'other'})

tsampledf['BGR'] = site_list

tsampledf['BGR'] = tsampledf['BGR'].replace([0], 'Wintergreen')
tsampledf['BGR'] = tsampledf['BGR'].replace([1], 'Cole Mountain')
tsampledf['BGR'] = tsampledf['BGR'].replace([2], 'Blacksburg')
tsampledf['BGR'] = tsampledf['BGR'].replace([3], 'Cole Mountain')

In [9]:
tsampledf['BGR'].unique()

array(['other', 'Wintergreen', 'Cole Mountain', 'Blacksburg'],
      dtype=object)

In [10]:
sql = """
INSERT INTO tSample (sample_id, site_id, BGR, collect_date) VALUES (:sample_id, :site_id, :BGR, :date)
;"""

for row in tsampledf.to_dict(orient='records'):
    curs.execute(sql, row)

In [11]:
if checking_views == True:
    sql = """
    CREATE VIEW tSample AS 
    SELECT *
    ;"""
    print(pd.read_sql("SELECT * FROM tSample;", conn))

    sample_id site_id         BGR collect_date
0      BLDTT0     BLD       other    6/29/2020
1      BLDTT1     BLD       other    6/29/2020
2      BLDTT2     BLD       other    6/29/2020
3      BLDTT3     BLD       other    6/29/2020
4      BLDTT4     BLD       other    6/29/2020
..        ...     ...         ...          ...
435     PNR12     PNR  Blacksburg     7/5/2021
436     PNR13     PNR  Blacksburg     7/5/2021
437     PNR14     PNR  Blacksburg     7/5/2021
438     PNR15     PNR  Blacksburg     7/5/2021
439     PNR16     PNR  Blacksburg     7/5/2021

[440 rows x 4 columns]


## Create Plant Morphology Table

In [12]:
tmorphdf = pd.read_csv('sql_data_files/complete_morphology.csv')
tmorphdf = tmorphdf.rename(columns={'Ht(cm)': 'plant_ht', '# Leaves': 'num_leaves', 'Length LL (cm)': 'longest_leaf_cm',
                                     'Width LL (cm)': 'widest_leaf_cm', 'Stem Diameter (mm)': 'stem_diam_mm',
                                     '# Inflorescences': 'num_inflores', '# Flowers Per (x2)': 'num_flowers',
                                     'Herbivory': 'herbivory', 'Species': 'species'})

In [13]:
# Define the tables

# If running this more than once, you'll need to drop the table before you can redefine it

curs.execute("DROP TABLE IF EXISTS tMorphology;")

sql = """
CREATE TABLE tMorphology (
    sample_id TEXT PRIMARY KEY,
    species TEXT,
    plant_ht REAL,
    num_leaves INTEGER,
    longest_leaf_cm REAL,
    widest_leaf_cm REAL,
    stem_diam_mm REAL,
    num_inflores INTEGER,
    num_flowers INTEGER,
    herbivory TEXT
)
;"""
curs.execute(sql)

# INSERT INTO (column names) VALUES (values to insert)
# The : indicates parameter names.
# They do not need to be the same as the columns we are inserting into, but it helps keep the code clean.
# They are matched up in order. For example, if instead of :lat I called it :x, then :x would be inserted into
# the lat column, since lat is the second column in the list of column names.
sql = """
INSERT INTO tMorphology (sample_id, species, plant_ht, num_leaves,
    longest_leaf_cm, widest_leaf_cm, stem_diam_mm, num_inflores,
    num_flowers, herbivory) VALUES (:sample_id, :species, :plant_ht, :num_leaves,
    :longest_leaf_cm, :widest_leaf_cm, :stem_diam_mm, :num_inflores,
    :num_flowers, :herbivory)
;"""

# I'm going to loop over the dataframe such that each row will be returned as a dictionary.
# The keys for the dictionary will be the column names, and the values will be the values in the dataframe.
# I've ensured that the names in my dataframe match exactly the parameter names above - the way this works

# is that SQL will look for that key in the dictionary.  For example, since I have a parameter :loc_id, 
# it will look for a key in the dictionary called loc_id.





for row in tmorphdf.to_dict(orient='records'):
    # Uncomment this to see the dictionaries    
    # The second input for curs.execute() are parameters
    curs.execute(sql, row)

In [14]:
if checking_views == True:
    sql = """
    CREATE VIEW tMorphology AS 

    SELECT *
    ;"""

    print(pd.read_sql("SELECT * FROM tMorphology;", conn))

    sample_id  species  plant_ht num_leaves longest_leaf_cm  widest_leaf_cm  \
0      BLDTT0     None      93.3         22            16.5             7.3   
1      BLDTT1     None      70.7         14            15.4             7.1   
2      BLDTT2     None      83.4         18            15.6             7.4   
3      BLDTT3     None      44.9          9             7.5             3.6   
4      BLDTT4     None      78.8         17            13.9             7.2   
..        ...      ...       ...        ...             ...             ...   
435     PNR12     None       NaN       None            None             NaN   
436     PNR13  syriaca     144.0         22            None             NaN   
437     PNR14   hybrid     150.0         14            None             NaN   
438     PNR15  syriaca     146.0         13            None             NaN   
439     PNR16  syriaca     172.0         24            None             NaN   

     stem_diam_mm num_inflores   num_flowers herbiv

## Create Soil Nutrients Table

In [15]:
curs.execute("DROP TABLE IF EXISTS tSoilNutrients;")
#Organic Matter % Rate	Estimated Nitrogen Release (lbs/A)	Cation Exchange Capacity (meq/100g)	
#pH	Buffer pH	Phosphorus ppm	Potassium ppm	Calcium ppm	Magnesium ppm	
#Sulfur ppm	Sodium ppm	Zinc ppm	Manganese ppm	Iron ppm	Copper ppm	Boron ppm

sql = """
CREATE TABLE tSoilNutrients (
    sample_id TEXT NOT NULL REFERENCES tSample(sample_id),
    OM REAL,
    ENR REAL,
    CEC REAL,
    pH REAL,
    BpH REAL,
    Phosphorus REAL,
    Potassium REAL,
    Calcium REAL,
    Magnesium REAL,
    Sulfur REAL,
    Sodium REAL,
    Zinc REAL,
    Manganese REAL,
    Iron REAL,
    Copper REAL,
    Boron REAL,
    PRIMARY KEY (sample_id)
)
;"""

curs.execute(sql)

<sqlite3.Cursor at 0x18ea4336740>

In [16]:
tsoilnutrientdf = pd.read_csv('sql_data_files/soil_nutrients.csv')
tsoilnutrientdf.shape
tsoilnutrientdf = tsoilnutrientdf.drop('site', axis = 1)
tsoilnutrientdf = tsoilnutrientdf.drop_duplicates('sample_id')
tsoilnutrientdf.tail()

Unnamed: 0,sample_id,OM,ENR,CEC,pH,BpH,Phosphorus,Potassium,Calcium,Magnesium,Sulfur,Sodium,Zinc,Manganese,Iron,Copper,Boron
418,PNR13,5.8,142,20.4,7.7,6.93,21,78,3777,155,12,13,3.7,98,92,1.1,1.1
419,PNR14,4.2,110,22.3,7.6,6.93,9,45,4217,131,9,13,2.1,36,156,1.2,0.5
420,PNR15,3.6,98,18.7,7.7,6.93,8,42,3472,143,13,13,2.5,51,135,1.2,0.7
421,PNR8,3.7,100,17.9,7.6,6.93,12,37,3335,124,9,14,1.5,35,59,0.7,0.5
422,PNR9,3.7,100,18.7,7.5,6.93,18,48,3457,147,7,13,1.6,39,62,0.8,0.7


In [17]:
soil_list = tsoilnutrientdf['sample_id'].tolist()
temp_list = []
for i in soil_list:
    if i[0] == '8':
        temp_list.append(i)

for i in temp_list:
    tsoilnutrientdf.drop(tsoilnutrientdf[(tsoilnutrientdf['sample_id'] == i)].index, inplace=True)
    tsoilnutrientdf.drop(tsoilnutrientdf[(tsoilnutrientdf['sample_id'] == 'BLDRV1-')].index, inplace=True)
    tsoilnutrientdf.drop_duplicates(subset="sample_id",
                             keep='first', inplace=True)
soil_list = tsoilnutrientdf['sample_id'].tolist()
for i in range(len(soil_list)):
    if soil_list[i][0] == '1':
        soil_list[i] = soil_list[i][1:]
    if soil_list[i][0] == ' ':
        soil_list[i] = soil_list[i][1:]
print(len(soil_list))

tsoilnutrientdf['sample_id'] = soil_list
tsoilnutrientdf.tail()

368


Unnamed: 0,sample_id,OM,ENR,CEC,pH,BpH,Phosphorus,Potassium,Calcium,Magnesium,Sulfur,Sodium,Zinc,Manganese,Iron,Copper,Boron
418,PNR13,5.8,142,20.4,7.7,6.93,21,78,3777,155,12,13,3.7,98,92,1.1,1.1
419,PNR14,4.2,110,22.3,7.6,6.93,9,45,4217,131,9,13,2.1,36,156,1.2,0.5
420,PNR15,3.6,98,18.7,7.7,6.93,8,42,3472,143,13,13,2.5,51,135,1.2,0.7
421,PNR8,3.7,100,17.9,7.6,6.93,12,37,3335,124,9,14,1.5,35,59,0.7,0.5
422,PNR9,3.7,100,18.7,7.5,6.93,18,48,3457,147,7,13,1.6,39,62,0.8,0.7


In [18]:
# INSERT INTO (column names) VALUES (values to insert)
# The : indicates parameter names.
# They do not need to be the same as the columns we are inserting into, but it helps keep the code clean.
# They are matched up in order. For example, if instead of :lat I called it :x, then :x would be inserted into
# the lat column, since lat is the second column in the list of column names.
sql = """
INSERT INTO tSoilNutrients (sample_id, OM, ENR, CEC, pH, BpH,
    Phosphorus, Potassium, Calcium, Magnesium, Sulfur, Sodium,
    Zinc, Manganese, Iron, Copper, Boron) VALUES (:sample_id, :OM, :ENR, :CEC, :pH, :BpH,
    :Phosphorus, :Potassium, :Calcium, :Magnesium, :Sulfur, :Sodium,
    :Zinc, :Manganese, :Iron, :Copper, :Boron)
;"""


# I'm going to loop over the dataframe such that each row will be returned as a dictionary.
# The keys for the dictionary will be the column names, and the values will be the values in the dataframe.
# I've ensured that the names in my dataframe match exactly the parameter names above - the way this works
# is that SQL will look for that key in the dictionary.  For example, since I have a parameter :loc_id, 
# it will look for a key in the dictionary called loc_id.





for row in tsoilnutrientdf.to_dict(orient='records'):
    # Uncomment this to see the dictionaries
    # The second input for curs.execute() are parameters
    curs.execute(sql, row)

sql = """
CREATE VIEW tSoilNutrients AS 

SELECT *
;"""

pd.read_sql("SELECT * FROM tSoilNutrients;", conn)

Unnamed: 0,sample_id,OM,ENR,CEC,pH,BpH,Phosphorus,Potassium,Calcium,Magnesium,Sulfur,Sodium,Zinc,Manganese,Iron,Copper,Boron
0,BLDTT0,4.3,124.0,8.1,6.8,6.91,18.0,180.0,1193.0,178.0,3.0,7.0,2.3,321.0,78.0,1.5,0.5
1,BLDTT11,4.7,135.0,6.6,6.0,6.83,22.0,195.0,781.0,138.0,5.0,9.0,1.9,85.0,76.0,1.0,0.4
2,BLDTT12,4.6,133.0,6.6,6.1,6.84,23.0,266.0,770.0,131.0,5.0,9.0,1.6,78.0,82.0,0.9,0.3
3,BLDTT13,4.9,138.0,6.8,6.1,6.84,24.0,270.0,741.0,175.0,1.0,9.0,2.2,76.0,80.0,1.1,0.3
4,BLDTT14,5.4,148.0,7.3,5.8,6.79,32.0,229.0,766.0,172.0,8.0,9.0,2.6,64.0,87.0,1.0,0.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
363,PNR13,5.8,142.0,20.4,7.7,6.93,21.0,78.0,3777.0,155.0,12.0,13.0,3.7,98.0,92.0,1.1,1.1
364,PNR14,4.2,110.0,22.3,7.6,6.93,9.0,45.0,4217.0,131.0,9.0,13.0,2.1,36.0,156.0,1.2,0.5
365,PNR15,3.6,98.0,18.7,7.7,6.93,8.0,42.0,3472.0,143.0,13.0,13.0,2.5,51.0,135.0,1.2,0.7
366,PNR8,3.7,100.0,17.9,7.6,6.93,12.0,37.0,3335.0,124.0,9.0,14.0,1.5,35.0,59.0,0.7,0.5


## Create Plant Microbe Tables

In [19]:
taxon_list = ['Class', 'Family', 'Genus', 'Order', 'Phylum', 'Species', 'Strain', 'Superkingdom']

for taxon_level in taxon_list:
    print(taxon_level)
    table_name = 'tPlantMicrobe' + taxon_level
    
    
    curs.execute("DROP TABLE IF EXISTS " + table_name + ";")

    sql = """
    CREATE TABLE """ + table_name + """ (
        sample_id TEXT NOT NULL REFERENCES tSample(sample_id),
        taxa TEXT,
        relative_abundance REAL,
        PRIMARY KEY (sample_id, taxa)
    )
    ;"""
    curs.execute(sql)
    
    PMdf = pd.read_csv('sql_data_files/leaf_microbiome_' + taxon_level.lower() + '.csv')
    print(PMdf.shape)
    sql = """
    INSERT INTO """ + table_name + """ (sample_id, taxa, relative_abundance)
    VALUES (:sample_id, :taxon, :val)
    ;"""

    for row in PMdf.to_dict(orient='records'):
        #print(row)
        curs.execute(sql, row)

Class
(6438, 4)
Family
(23710, 4)
Genus
(48534, 4)
Order
(13573, 4)
Phylum
(3023, 4)
Species
(178091, 4)
Strain


  PMdf = pd.read_csv('sql_data_files/leaf_microbiome_' + taxon_level.lower() + '.csv')


(178091, 4)
Superkingdom
(756, 4)


## Create Plant Nutrients Table

In [20]:
curs.execute("DROP TABLE IF EXISTS tPlantNutrients;")


sql = """
CREATE TABLE tPlantNutrients (
    sample_id TEXT NOT NULL REFERENCES tSample(sample_id),
    N_percent_dm REAL,
    P_percent_dm REAL,
    K_percent_dm REAL,
    Ca_percent_dm REAL,
    Mg_percent_dm REAL,
    Na_percent_dm REAL,
    S_percent_dm REAL,
    Zn_ppm_dm REAL,
    Mn_ppm_dm REAL,
    Fe_ppm_dm REAL,
    Cu_ppm_dm REAL,
    B_ppm_dm REAL,
    Al_ppm_dm REAL,
    PRIMARY KEY (sample_id)
)
;"""
curs.execute(sql)

<sqlite3.Cursor at 0x18ea4336740>

In [21]:
tleafnutrientdf = pd.read_csv('sql_data_files/leaf_nutrients.csv')

In [22]:
sql = """
INSERT INTO tPlantNutrients (sample_id, N_percent_dm, P_percent_dm, K_percent_dm, Ca_percent_dm, 
Mg_percent_dm, Na_percent_dm, S_percent_dm, Zn_ppm_dm, Mn_ppm_dm, Fe_ppm_dm, Cu_ppm_dm, B_ppm_dm, Al_ppm_dm)
VALUES (:Sample_ID, :N_percent_dm, :P_percent_dm, :K_percent_dm, :Ca_percent_dm, 
:Mg_percent_dm, :Na_percent_dm, :S_percent_dm, :Zn_ppm_dm, :Mn_ppm_dm, :Fe_ppm_dm, :Cu_ppm_dm, :B_ppm_dm, :Al_ppm_dm)
;"""

for row in tleafnutrientdf.to_dict(orient='records'):
    # Uncomment this to see the dictionaries
    #print(row)
    
    # The second input for curs.execute() are parameters
    curs.execute(sql, row)

In [23]:
sql = """
CREATE VIEW tPlantNutrients AS 

SELECT *
;"""

pd.read_sql("SELECT * FROM tPlantNutrients;", conn)

Unnamed: 0,sample_id,N_percent_dm,P_percent_dm,K_percent_dm,Ca_percent_dm,Mg_percent_dm,Na_percent_dm,S_percent_dm,Zn_ppm_dm,Mn_ppm_dm,Fe_ppm_dm,Cu_ppm_dm,B_ppm_dm,Al_ppm_dm
0,RRL1,,0.21,4.01,0.83,0.29,0.01,0.37,29.0,281.0,234.0,8.0,24.0,130.0
1,RRL2,2.24,0.16,5.34,1.05,0.32,0.01,0.29,20.0,423.0,76.0,5.0,27.0,36.0
2,RRL3,,0.20,4.75,0.63,0.27,0.04,0.22,21.0,469.0,68.0,5.0,32.0,18.0
3,RRL4,,0.18,4.74,1.25,0.32,0.01,0.28,18.0,680.0,78.0,6.0,36.0,27.0
4,RRL5,3.00,0.26,3.51,1.20,0.44,0.01,0.29,29.0,507.0,66.0,7.0,23.0,22.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
218,PNR11,3.34,0.48,3.56,1.50,0.36,0.01,0.28,24.0,90.0,53.0,14.0,33.0,18.0
219,PNR13,1.91,0.21,1.55,2.33,0.55,0.01,0.35,13.0,39.0,103.0,5.0,21.0,40.0
220,PNR14,2.91,0.24,2.32,2.03,0.53,0.08,0.32,14.0,71.0,67.0,6.0,25.0,21.0
221,PNR15,2.72,0.22,2.13,2.40,0.60,0.06,0.38,12.0,30.0,50.0,6.0,34.0,26.0


### New Soil Microbiome Tables

In [24]:
taxon_list = ['Class', 'Family', 'Genus', 'Order', 'Phylum', 'Species', 'Strain', 'Superkingdom']

for taxon_level in taxon_list:
    print(taxon_level)
    table_name = 'tSoilMicrobe' + taxon_level
    
    
    curs.execute("DROP TABLE IF EXISTS " + table_name + ";")

    sql = """
    CREATE TABLE """ + table_name + """ (
        sample_id TEXT NOT NULL REFERENCES tSample(sample_id),
        taxa TEXT,
        relative_abundance REAL,
        PRIMARY KEY (sample_id, taxa)
    )
    ;"""
    curs.execute(sql)
    
    PMdf = pd.read_csv('sql_data_files/soil_microbiome_' + taxon_level.lower() + '.csv')
    print(PMdf.shape)
    sql = """
    INSERT INTO """ + table_name + """ (sample_id, taxa, relative_abundance)
    VALUES (:sample_id, :taxon, :val)
    ;"""

    for row in PMdf.to_dict(orient='records'):
        #print(row)
        curs.execute(sql, row)

Class
(7323, 4)
Family
(29633, 4)
Genus
(96166, 4)
Order
(14409, 4)
Phylum
(5141, 4)
Species
(409153, 4)
Strain
(409153, 4)
Superkingdom
(364, 4)


## Create Plant DNA Table

In [25]:
plant_dna = pd.read_csv('sql_data_files/final_plant_dna.csv')

In [26]:
plant_dna.head()

Unnamed: 0.1,Unnamed: 0,V1,V2,Name,Species,PC1,PC2,PC3,PC4,PC5
0,0,0.753244,0.246756,RRL1,exaltata,0.194769,0.048276,-0.02388,-0.017138,-0.039842
1,1,0.581366,0.418634,RRL9,exaltata,0.097719,0.094733,-0.00656,0.012822,-0.022248
2,2,0.591513,0.408487,RRL17,exaltata,0.099527,0.091996,0.004098,-0.021347,0.00719
3,3,0.048066,0.951934,FRW3,syriaca,-0.139465,-0.064022,-0.009963,-0.047536,0.024757
4,4,0.003946,0.996054,FRW12,syriaca,-0.155135,0.055455,0.009907,-0.043334,-0.009337


In [27]:
curs.execute("DROP TABLE IF EXISTS tPlantDNA;")

sql = """
CREATE TABLE tPlantDNA (
    sample_id TEXT NOT NULL REFERENCES tSample(sample_id),
    V1 REAL,
    V2 REAL,
    species TEXT,
    PC1 REAL,
    PC2 REAL,
    PC3 REAL,
    PC4 REAL,
    PC5 REAL,
    PRIMARY KEY (sample_id)
)
;"""
curs.execute(sql)

<sqlite3.Cursor at 0x18ea4336740>

In [28]:
sql = """
INSERT INTO tPlantDNA (
    sample_id,
    V1,
    V2,
    species,
    PC1,
    PC2,
    PC3,
    PC4,
    PC5
)
VALUES (
    :Name,
    :V1,
    :V2,
    :Species,
    :PC1,
    :PC2,
    :PC3,
    :PC4,
    :PC5
)
;"""

for row in plant_dna.to_dict(orient='records'):
    # Uncomment this to see the dictionaries
    #print(row)
    
    # The second input for curs.execute() are parameters
    curs.execute(sql, row)

In [29]:
conn.commit()

In [30]:
sql_query = """SELECT name FROM sqlite_master  
  WHERE type='table';"""

In [31]:
curs.execute(sql_query)

<sqlite3.Cursor at 0x18ea4336740>

In [32]:
print(curs.fetchall())

[('tSample',), ('tMorphology',), ('tSoilNutrients',), ('tPlantMicrobeClass',), ('tPlantMicrobeFamily',), ('tPlantMicrobeGenus',), ('tPlantMicrobeOrder',), ('tPlantMicrobePhylum',), ('tPlantMicrobeSpecies',), ('tPlantMicrobeStrain',), ('tPlantMicrobeSuperkingdom',), ('tPlantNutrients',), ('tSoilMicrobeClass',), ('tSoilMicrobeFamily',), ('tSoilMicrobeGenus',), ('tSoilMicrobeOrder',), ('tSoilMicrobePhylum',), ('tSoilMicrobeSpecies',), ('tSoilMicrobeStrain',), ('tSoilMicrobeSuperkingdom',), ('tPlantDNA',)]


In [33]:
sql = """
CREATE VIEW tSample AS 

SELECT *
;"""

testdf = pd.read_sql("SELECT * FROM tSample;", conn)

In [34]:
testdf.head()

Unnamed: 0,sample_id,site_id,BGR,collect_date
0,BLDTT0,BLD,other,6/29/2020
1,BLDTT1,BLD,other,6/29/2020
2,BLDTT2,BLD,other,6/29/2020
3,BLDTT3,BLD,other,6/29/2020
4,BLDTT4,BLD,other,6/29/2020


In [35]:
testdf.shape

(440, 4)