# The Letters of Libanius

In [1]:
#Import libraries and read CSVs

import pandas as pd
import sqlite3

letter_table = pd.read_csv("https://raw.githubusercontent.com/isaw-ga-3024/isaw-ga-3024.github.io/master/Papadimitriou-Mikael-mikepnyu/Libanius/Tables/CSV/Letter_Table.csv")
recipient_table = pd.read_csv("https://raw.githubusercontent.com/isaw-ga-3024/isaw-ga-3024.github.io/master/Papadimitriou-Mikael-mikepnyu/Libanius/Tables/CSV/Recipient_Table.csv")
carrier_table = pd.read_csv("https://raw.githubusercontent.com/isaw-ga-3024/isaw-ga-3024.github.io/master/Papadimitriou-Mikael-mikepnyu/Libanius/Tables/CSV/Carrier_Table.csv")
translation_table = pd.read_csv("https://raw.githubusercontent.com/isaw-ga-3024/isaw-ga-3024.github.io/master/Papadimitriou-Mikael-mikepnyu/Libanius/Tables/CSV/Translation_Table.csv")
type_table = pd.read_csv("https://raw.githubusercontent.com/isaw-ga-3024/isaw-ga-3024.github.io/master/Papadimitriou-Mikael-mikepnyu/Libanius/Tables/CSV/Type_Table.csv")
destination_table = pd.read_csv("https://raw.githubusercontent.com/isaw-ga-3024/isaw-ga-3024.github.io/master/Papadimitriou-Mikael-mikepnyu/Libanius/Tables/CSV/Destination_Table.csv")
mapping_table = pd.read_csv("https://raw.githubusercontent.com/isaw-ga-3024/isaw-ga-3024.github.io/master/Papadimitriou-Mikael-mikepnyu/Libanius/Tables/CSV/Mapping_Table(Letter+Type).csv")


In [2]:
#Test the dataframes before going into sql

print("letter_table")
print(letter_table.head(2))
print("\nrecipient_table")
print(recipient_table.head(2))
print("\ncarrier_table")
print(carrier_table.head(2))
print("\ntranslation_table")
print(translation_table.head(2))
print("\ntype_table")
print(type_table.head(2))
print("\ndestination_table")
print(destination_table.head(2))
print("\nmapping_table")
print(mapping_table.head(2))

letter_table
   Letter_ID  Date_Min  Date_Max     Destination  Recipient_ID  Carrier_ID
0        399       355       355  Constantinople             1         NaN
1        560       357       357  Constantinople             1         1.0

recipient_table
   Recipient_ID Recipient_Name       PLRE_ID  BLZG_ID
0             1     Andronicus  Andronicus 3      NaN
1             2       Hypatius    Hypatius 1      NaN

carrier_table
   Carrier_ID Carrier_Name PLRE_ID      BLZG_ID
0           1    Majorinus     NaN          NaN
1           2    Auxentius     NaN  Auxentius v

translation_table
   Letter_ID  Norman  Cabouret_2000 Bradbury_2004  Cribiore_2007
0        399     NaN            NaN           B86            NaN
1        560     NaN            NaN           B87            NaN

type_table
   Type_ID            Name                                        Description
0        1  Recommendation  A letter that recommends an individual for a p...
1        2          School  A letter that 

In [3]:
#Load the database connection into memory

conn = sqlite3.connect(':memory:')

In [4]:
#Load the dataframes into sql

letter_table.to_sql('letter_table',conn,if_exists="replace")
recipient_table.to_sql('recipient_table', conn, if_exists="replace")
carrier_table.to_sql('carrier_table', conn, if_exists="replace")
translation_table.to_sql('translation_table', conn, if_exists="replace")
type_table.to_sql('type_table', conn, if_exists="replace")
destination_table.to_sql('destination_table', conn, if_exists="replace")
mapping_table.to_sql('mapping_table', conn, if_exists="replace")

# Problems with the datatype of certain columns

In [5]:
#Test the datatypes in sql (first attempt)

pd.read_sql("""PRAGMA table_info(letter_table)""",conn)

#Why is the datatype of the column Carrier_ID REAL instead of INTEGER?

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,index,INTEGER,0,,0
1,1,Letter_ID,INTEGER,0,,0
2,2,Date_Min,INTEGER,0,,0
3,3,Date_Max,INTEGER,0,,0
4,4,Destination,TEXT,0,,0
5,5,Recipient_ID,INTEGER,0,,0
6,6,Carrier_ID,REAL,0,,0


In [6]:
#I attempt to change the datatype of the column Carrier_ID

pd.read_sql("""
ALTER TABLE letter_table
ALTER COLUMN Carrier_ID INTEGER
""",conn)

#This results in an error, since sqlite3 does not support the MODIFY COLUMN statement
#There is apprently no easy way out of this problem in sqlite

ERROR: An unexpected error occurred while tokenizing input
The following traceback may be corrupted or invalid
The error message is: ('EOF in multi-line string', (1, 0))



DatabaseError: Execution failed on sql '
ALTER TABLE letter_table
ALTER COLUMN Carrier_ID INTEGER
': near "ALTER": syntax error

In [7]:
#I have to go back to pandas

letter_table.dtypes

Letter_ID         int64
Date_Min          int64
Date_Max          int64
Destination      object
Recipient_ID      int64
Carrier_ID      float64
dtype: object

In [8]:
#Change datatype in pandas from float to int (first attempt)

letter_table[['Carrier_ID']] = letter_table[['Carrier_ID']].astype(int)

#Still will not work, since pandas cannot convert NA to Integers
#Need to go back and add zeros in the empty cells of the file

ValueError: Cannot convert NA to integer

In [9]:
#Fill in the NaN with zeros

letter_table['Carrier_ID'] = letter_table['Carrier_ID'].fillna(0)

In [10]:
#Change datatype in pandas from float to int (second attempt)

letter_table[['Carrier_ID']] = letter_table[['Carrier_ID']].astype("int64")

#Using the type "int" renders the column int32 instead of int64

In [11]:
#Test the modified datatype

print(letter_table.dtypes)

Letter_ID        int64
Date_Min         int64
Date_Max         int64
Destination     object
Recipient_ID     int64
Carrier_ID       int64
dtype: object


In [12]:
#Test results in pandas

print(letter_table.head(3))

   Letter_ID  Date_Min  Date_Max     Destination  Recipient_ID  Carrier_ID
0        399       355       355  Constantinople             1           0
1        560       357       357  Constantinople             1           1
2        156       360       360       Phoenicia             1           2


In [13]:
#Reload dataframes into sql

letter_table.to_sql('letter_table',conn,if_exists="replace")
recipient_table.to_sql('recipient_table', conn, if_exists="replace")
carrier_table.to_sql('carrier_table', conn, if_exists="replace")
translation_table.to_sql('translation_table', conn, if_exists="replace")
type_table.to_sql('type_table', conn, if_exists="replace")
destination_table.to_sql('destination_table', conn, if_exists="replace")
mapping_table.to_sql('mapping_table', conn, if_exists="replace")

In [14]:
#Test datatypes in sql (second attempt)

pd.read_sql("""PRAGMA table_info(letter_table)""",conn)

#Perfect!

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,index,INTEGER,0,,0
1,1,Letter_ID,INTEGER,0,,0
2,2,Date_Min,INTEGER,0,,0
3,3,Date_Max,INTEGER,0,,0
4,4,Destination,TEXT,0,,0
5,5,Recipient_ID,INTEGER,0,,0
6,6,Carrier_ID,INTEGER,0,,0


# Back to our regularly scheduled program

The following cells attempt to interact with the data in meaningful ways 
to show the potential of the database.

In [15]:
#Display basic information about the letters by joining two tables (letter_table and recipient_table).

pd.read_sql("""
SELECT letter_table.Letter_ID, letter_table.Date_Min, letter_table.Date_Max, letter_table.Destination, recipient_table.Recipient_Name, recipient_table.PLRE_ID
FROM letter_table, recipient_table
WHERE (letter_table.Recipient_ID = recipient_table.Recipient_ID)
ORDER BY letter_table.Letter_ID
""", conn)

Unnamed: 0,Letter_ID,Date_Min,Date_Max,Destination,Recipient_Name,PLRE_ID
0,156,360,360,Phoenicia,Andronicus,Andronicus 3
1,158,360,360,Palestina Prima,Hypatius,Hypatius 1
2,159,360,360,Phoenicia,Andronicus,Andronicus 3
3,166,360,360,Phoenicia,Andronicus,Andronicus 3
4,175,360,360,Phoenicia,Andronicus,Andronicus 3
5,399,355,355,Constantinople,Andronicus,Andronicus 3
6,560,357,357,Constantinople,Andronicus,Andronicus 3
7,838,363,363,,Alexander,Alexander 5
8,1357,363,363,Apamea,Marcellinus,
9,1460,363,363,Phoenicia,Andronicus,Andronicus 3


Since not all letters had carriers, I could not join the carrier_table with the other two. 
I had to create a "None" row into the carrier_table, so that those newly added zeros can refer to something in that table
For some reason, the INSERT statement would not cooperate in sqlite, so I had to modify the CSV directly

In [16]:
#Display more information by joining carrier_table to the other two

pd.read_sql("""
SELECT letter_table.Letter_ID, letter_table.Date_Min, letter_table.Date_Max, letter_table.Destination, recipient_table.Recipient_Name, 
    recipient_table.PLRE_ID, carrier_table.Carrier_Name, carrier_table.PLRE_ID, carrier_table.BLZG_ID
FROM letter_table, recipient_table, carrier_table
WHERE (letter_table.Recipient_ID = recipient_table.Recipient_ID)
    AND (letter_table.Carrier_ID = carrier_table.Carrier_ID)
ORDER BY letter_table.Letter_ID
""", conn)

Unnamed: 0,Letter_ID,Date_Min,Date_Max,Destination,Recipient_Name,PLRE_ID,Carrier_Name,PLRE_ID.1,BLZG_ID
0,156,360,360,Phoenicia,Andronicus,Andronicus 3,Auxentius,,Auxentius v
1,158,360,360,Palestina Prima,Hypatius,Hypatius 1,,,
2,159,360,360,Phoenicia,Andronicus,Andronicus 3,,,
3,166,360,360,Phoenicia,Andronicus,Andronicus 3,,,
4,175,360,360,Phoenicia,Andronicus,Andronicus 3,Bassus,,Bassus ii
5,399,355,355,Constantinople,Andronicus,Andronicus 3,,,
6,560,357,357,Constantinople,Andronicus,Andronicus 3,Majorinus,,
7,838,363,363,,Alexander,Alexander 5,,,
8,1357,363,363,Apamea,Marcellinus,,,,
9,1460,363,363,Phoenicia,Andronicus,Andronicus 3,Marius,Marius 1,


In [17]:
#Where to find a translation of the letters made in or after 360 AD

pd.read_sql("""
SELECT letter_table.Letter_ID, translation_table.Norman, translation_table.Cabouret_2000, Bradbury_2004, translation_table.Cribiore_2007
FROM letter_table, translation_table
WHERE (letter_table.Letter_ID = translation_table.Letter_ID)
    AND (letter_table.Date_Min >= 360)
ORDER BY letter_table.Letter_ID
""", conn)


Unnamed: 0,Letter_ID,Norman,Cabouret_2000,Bradbury_2004,Cribiore_2007
0,156,,,B88,
1,158,,,B89,
2,159,,,B90,
3,166,,,B91,
4,175,,,B92,
5,838,,,B94,
6,1357,,,B95,
7,1460,,,B93,


In [18]:
#Display each types associated with each letters
#This is a many ot many relationship that uses a mapping table to keep track of everything
#A description of every time in the typology is provided, although not displayed in its entirety. 

pd.read_sql("""
SELECT letter_table.Letter_ID, type_table.Name, type_table.Description
FROM letter_table, type_table, mapping_table
WHERE (letter_table.Letter_ID = mapping_table.Letter_ID)
    AND (mapping_table.Type_ID = type_table.Type_ID)
ORDER BY letter_table.Letter_ID
""", conn)


Unnamed: 0,Letter_ID,Name,Description
0,156,Favour,A letter whose purpose is to obtain a favour f...
1,156,Introduction,A letter that introduces an individual to anot...
2,158,Personal,A letter whose purpose is to maintain his rela...
3,159,Personal,A letter whose purpose is to maintain his rela...
4,166,Favour,A letter whose purpose is to obtain a favour f...
5,175,Favour,A letter whose purpose is to obtain a favour f...
6,175,Introduction,A letter that introduces an individual to anot...
7,399,Personal,A letter whose purpose is to maintain his rela...
8,560,Recommendation,A letter that recommends an individual for a p...
9,838,School,A letter that deals with issues related to Lib...


In [19]:
#If we want to find only the letters of introduction

pd.read_sql("""
SELECT letter_table.Letter_ID, letter_table.Date_Min, letter_table.Date_Max, letter_table.Destination, recipient_table.Recipient_Name, 
    recipient_table.PLRE_ID, carrier_table.Carrier_Name, carrier_table.PLRE_ID, carrier_table.BLZG_ID
FROM letter_table, type_table, mapping_table, recipient_table, carrier_table
WHERE (letter_table.Letter_ID = mapping_table.Letter_ID)
    AND (mapping_table.Type_ID = type_table.Type_ID)
    AND (type_table.Name = "Introduction")
    AND (letter_table.Recipient_ID = recipient_table.Recipient_ID)
    AND (letter_table.Carrier_ID = carrier_table.Carrier_ID)
ORDER BY letter_table.Letter_ID
""", conn)

Unnamed: 0,Letter_ID,Date_Min,Date_Max,Destination,Recipient_Name,PLRE_ID,Carrier_Name,PLRE_ID.1,BLZG_ID
0,156,360,360,Phoenicia,Andronicus,Andronicus 3,Auxentius,,Auxentius v
1,175,360,360,Phoenicia,Andronicus,Andronicus 3,Bassus,,Bassus ii
2,1460,363,363,Phoenicia,Andronicus,Andronicus 3,Marius,Marius 1,
