# Database connection test

# Preparation

In [1]:
# Imports
import sqlite3
import pandas as pd

## Main connection

In [2]:
# Connect to main database
db_path = '../TsakonianDB.sqlite3'
conn = sqlite3.connect(db_path)

## Auxiliary functions

In [3]:
def query(query: str,
          cursor = conn.cursor()):

      cursor.execute(query)
      fetched = cursor.fetchall()

      if len(fetched) > 0:
            temp_df = pd.DataFrame(fetched, columns=[i[0] for i in cursor.description])
            return temp_df
      else:
            print('Query executed successfully. No results to show.')

# Add words from main dictionary to the database

## Load dictionary

In [4]:
# Load main dictionary
main_df_path = '../data/tables/main.xlsx'
main_df = pd.read_excel(main_df_path)
main_df

Unnamed: 0,tsakonian,greek,paradigm,source_id
0,άγιε,εκκλησία,Α4,
1,άγο,άλογο,Α0,1.0
2,άγουστε,αύγουστος,,1.0
3,άζ̌α,"αραία, όχι συχνά",,1.0
4,άθρωπο,άνθροπος,Α6,1.0
...,...,...,...,...
807,όρπα,εκεί,Ζ,1.0
808,όσ̌ου,όσο,Ζ,1.0
809,όταν,όταν,Ζ,1.0
810,ότσι,ότι,Ζ,1.0


## Manage duplicates

In [5]:
# Remove duplicates with the exact same information
print(f'Entries before removing duplicates: {len(main_df)}')
main_df = main_df.drop_duplicates()
print(f'Entries after removing duplicates: {len(main_df)}')

Entries before removing duplicates: 812
Entries after removing duplicates: 812


In [6]:
# Show duplicates
main_df[main_df.duplicated(subset = 'tsakonian', keep = False)].sort_values(by = 'tsakonian')

Unnamed: 0,tsakonian,greek,paradigm,source_id
168,βου,βόδι,Α2,1.0
169,βου,κλαίω,Ρ,1.0
342,κηρούνι,πηρούνι,,1.0
343,κηρούνι,πηρούνι,Υ3,1.0
629,σερικ̇ή,ιούνιος,,1.0
630,σερικ̇ή,θεριστής,Α,1.0


In [7]:
# Keep the Tsakonian-Greek duplicate with the lowest ID
print(f'Entries before removing duplicates: {len(main_df)}')
main_df = (main_df
           .sort_index()
           .drop_duplicates(subset = ['tsakonian', 'greek'], keep = 'first')
           .sort_values(by = 'tsakonian')
           .reset_index(drop = True)
           )
print(f'Entries after removing duplicates: {len(main_df)}')
main_df

Entries before removing duplicates: 812
Entries after removing duplicates: 812


Unnamed: 0,tsakonian,greek,paradigm,source_id
0,άγιε,εκκλησία,Α4,
1,άγο,άλογο,Α0,1.0
2,άγουστε,αύγουστος,,1.0
3,άζ̌α,"αραία, όχι συχνά",,1.0
4,άθρωπο,άνθροπος,Α6,1.0
...,...,...,...,...
807,όρπα,εκεί,Ζ,1.0
808,όσ̌ου,όσο,Ζ,1.0
809,όταν,όταν,Ζ,1.0
810,ότσι,ότι,Ζ,1.0


## Save to main Excel

In [8]:
# Save main_df back to the Excel file
main_df.to_excel(main_df_path, index = False)
print('Changes consolidated.')

Changes consolidated.


## Merge paradigms table

In [9]:
# Read paradigms.xlsx
paradigms_df_path = '../data/tables/paradigms.xlsx'
paradigms_df = pd.read_excel(paradigms_df_path)
paradigms_df.head()

Unnamed: 0,paradigm,notes,gen_sing,plural
0,Α0,"ο, πλ. ανόμαλος",,
1,Α1,"ο, πλ. -οι",,-οι
2,Α2,"ο, πλ. -ε",,-ε
3,Α3,"ο, πλ. -ου",,-ου
4,Α4,"ο, πλ. -ουνε, γεν. -ου",-ου,-ουνε


In [10]:
# Merge paradigms_df with main_df
if 'notes' not in main_df.columns:
      main_df = main_df.merge(paradigms_df, how = 'left', on = 'paradigm')

main_df.head()

Unnamed: 0,tsakonian,greek,paradigm,source_id,notes,gen_sing,plural
0,άγιε,εκκλησία,Α4,,"ο, πλ. -ουνε, γεν. -ου",-ου,-ουνε
1,άγο,άλογο,Α0,1.0,"ο, πλ. ανόμαλος",,
2,άγουστε,αύγουστος,,1.0,,,
3,άζ̌α,"αραία, όχι συχνά",,1.0,,,
4,άθρωπο,άνθροπος,Α6,1.0,"ο, πλ. -ου, γεν. -ου",-ου,-ου


In [16]:
# Copy the full dictionary into the database
main_df.to_sql('dictionary_entry', 
               conn, 
               if_exists='replace', 
               index=False,
               dtype = {'tsakonian': 'varchar(50)',
                        'greek': 'varchar(200)',
                        'paradigm': 'varchar(5)',
                        'source_id': 'bigint',
                        'notes': 'varchar(30)'
                        'gen_sing' : 'varchar(10)',
                        'plur' : 'varchar(10)',
               })

# Delete the temporary table if it exists
try:
    query("DROP TABLE sqlitestudio_temp_table;")
except:
    pass

# Add primary keys and foreign keys by recreating the table
# Obtained from SQLiteStudio
recreating_query = """PRAGMA foreign_keys = 0;

CREATE TABLE sqlitestudio_temp_table AS SELECT *
                                          FROM dictionary_entry;

DROP TABLE dictionary_entry;

CREATE TABLE dictionary_entry (
    id        INTEGER       PRIMARY KEY AUTOINCREMENT,
    tsakonian VARCHAR (50)  NOT NULL,
    greek     VARCHAR (200),
    paradigm  VARCHAR (5),
    source_id    INTEGER       REFERENCES dictionary_source (source_id),
    notes     VARCHAR (30),
    gen_sing  VARCHAR (10),
    plur      VARCHAR (10)

);

INSERT INTO dictionary_entry (
                                 tsakonian,
                                 greek,
                                 paradigm,
                                 source_id,
                                 notes,
                                 gen_sing,
                                 plur

                             )
                             SELECT tsakonian,
                                    greek,
                                    paradigm,
                                    source_id,
                                    notes,
                                    gen_sing,
                                    plur
                               FROM sqlitestudio_temp_table;

DROP TABLE sqlitestudio_temp_table;

PRAGMA foreign_keys = 1;"""

# Execute queries in a loop
# Only one query can be executed at a time
for q in recreating_query.split(';'):
      query(q)

# Save changes
conn.commit()

Query executed successfully. No results to show.
Query executed successfully. No results to show.
Query executed successfully. No results to show.
Query executed successfully. No results to show.
Query executed successfully. No results to show.
Query executed successfully. No results to show.
Query executed successfully. No results to show.
Query executed successfully. No results to show.


# Update sources table

In [13]:
# Read sources.xlsx
sources_df_path = '../data/tables/sources.xlsx'
sources_df = pd.read_excel(sources_df_path)
sources_df.head()

Unnamed: 0,source_id,title,url
0,1,"Καμβύσης, Ιωάννης (2020). Για να κ̇οντούμε τα ...",
1,2,"Δέφνερ, Μιχαήλ (1923). Λεξικόν της Τσακώνικης ...",
2,3,"Warr, John. Tsakoniandialect.info",http://www.tsakoniandialect.info/
3,4,"Μάνου, Ελένη (2015). Το πρώκιου μι Τσακώνικο λ...",
4,5,"Κωστάκης, Θανάσης (1986). Λεξικό της τσακώνικη...",


In [15]:
# Copy the full sources table into the database
sources_df.to_sql('dictionary_source', 
                  conn, 
                  if_exists='replace', 
                  index=False,
                  dtype = {'id': 'bigint',
                           'source': 'varchar(100)',
                           'author': 'varchar(100)',
                           'year': 'varchar(10)',
                           'notes': 'varchar(100)'
                  })

# Delete the temporary table if it exists
try:
    query("DROP TABLE sqlitestudio_temp_table;")
except:
    pass

# Add primary keys and foreign keys by recreating the table
# Obtained from SQLiteStudio
recreating_query = """PRAGMA foreign_keys = 0;

CREATE TABLE sqlitestudio_temp_table AS SELECT *
                                            FROM dictionary_source;

DROP TABLE dictionary_source;

CREATE TABLE dictionary_source (
    source_id   INTEGER       PRIMARY KEY AUTOINCREMENT,
    title       VARCHAR (300),
    url         VARCHAR (200)
);

INSERT INTO dictionary_source (
                                    source_id,
                                    title,
                                    url
                                )
                                SELECT source_id,
                                         title,
                                         url
                                    FROM sqlitestudio_temp_table;

DROP TABLE sqlitestudio_temp_table;

PRAGMA foreign_keys = 1;"""

# Execute queries in a loop
# Only one query can be executed at a time
for q in recreating_query.split(';'):
      query(q)

# Save changes
conn.commit()

Query executed successfully. No results to show.
Query executed successfully. No results to show.
Query executed successfully. No results to show.
Query executed successfully. No results to show.
Query executed successfully. No results to show.
Query executed successfully. No results to show.
Query executed successfully. No results to show.
Query executed successfully. No results to show.
Query executed successfully. No results to show.
