# Database connection test

# Preparation

In [14]:
# Imports
import sqlite3
import pandas as pd

## Main connection

In [15]:
# Connect to main database
db_path = '../TsakonianDB.sqlite3'
conn = sqlite3.connect(db_path)

## Auxiliary functions

In [16]:
def query(query: str,
          cursor = conn.cursor()):

      cursor.execute(query)
      fetched = cursor.fetchall()

      if len(fetched) > 0:
            temp_df = pd.DataFrame(fetched, columns=[i[0] for i in cursor.description])
            return temp_df
      else:
            print('Query executed successfully. No results to show.')

# Add words from main dictionary to the database

## Load dictionary

In [17]:
# Load main dictionary
main_df_path = '../data/tables/main.xlsx'
main_df = pd.read_excel(main_df_path)
main_df

Unnamed: 0,tsakonian,greek,paradigm,source_id
0,άβατ̇ε,άκλαυτος,Ε,1.0
1,άγιε,εκκλησία,Α4,
2,άγο,άλογο,Α0,1.0
3,άγουστε,αύγουστος,,1.0
4,άζ̌α,"αραία, όχι συχνά",,1.0
...,...,...,...,...
936,όσ̌ου,όσο,Ζ,1.0
937,όταν,όταν,Ζ,1.0
938,ότσι,ότι,Ζ,1.0
939,ύο,νερό,Υ0,1.0


## Manage duplicates

In [18]:
# Remove duplicates with the exact same information
print(f'Entries before removing duplicates: {len(main_df)}')
main_df = main_df.drop_duplicates()
print(f'Entries after removing duplicates: {len(main_df)}')

Entries before removing duplicates: 941
Entries after removing duplicates: 940


In [19]:
# Show duplicates
main_df[main_df.duplicated(subset = 'tsakonian', keep = False)].sort_values(by = 'tsakonian')

Unnamed: 0,tsakonian,greek,paradigm,source_id
192,βου,κλαίω,Ρ,1.0
193,βου,βόδι,Α2,1.0
399,κηρούνι,πηρούνι,Υ3,1.0
400,κηρούνι,πηρούνι,,1.0
434,κουκί,κουτί,Υ,1.0
435,κουκί,κουπί,Υ,1.0
478,λεκό,λευκό,Ε,1.0
479,λεκό,λευκός,Ε,1.0
603,ο,ο,,1.0
604,ο,δεν,,


In [20]:
# Keep the Tsakonian-Greek duplicate with the lowest ID
print(f'Entries before removing duplicates: {len(main_df)}')
main_df = (main_df
           .sort_index()
           .drop_duplicates(subset = ['tsakonian', 'greek'], keep = 'first')
           .sort_values(by = 'tsakonian')
           .reset_index(drop = True)
           )
print(f'Entries after removing duplicates: {len(main_df)}')
main_df

Entries before removing duplicates: 940
Entries after removing duplicates: 940


Unnamed: 0,tsakonian,greek,paradigm,source_id
0,άβατ̇ε,άκλαυτος,Ε,1.0
1,άγιε,εκκλησία,Α4,
2,άγο,άλογο,Α0,1.0
3,άγουστε,αύγουστος,,1.0
4,άζ̌α,"αραία, όχι συχνά",,1.0
...,...,...,...,...
935,όσ̌ου,όσο,Ζ,1.0
936,όταν,όταν,Ζ,1.0
937,ότσι,ότι,Ζ,1.0
938,ύο,νερό,Υ0,1.0


## Save to main Excel

In [21]:
# Save main_df back to the Excel file
main_df.to_excel(main_df_path, index = False)
print('Changes consolidated.')

Changes consolidated.


## Merge paradigms table

In [22]:
# Read paradigms.xlsx
paradigms_df_path = '../data/tables/paradigms_nouns.xlsx'
paradigms_df = pd.read_excel(paradigms_df_path)
paradigms_df.tail()

Unnamed: 0,paradigm,notes,gen_sing,plural
33,π̇ιτόκαλε,"το, πλ. π̇ιτόκα",,π̇ιτόκα
34,κάλ̣ι,"το, πλ. κάβα",,κάβα
35,μάλ̣ι,"το, πλ. μάβα",,μάβα
36,άι,"το, πλ. άζα",,άζα
37,ύο,"το, πλ. ύβατα, γεν. υβάτου",υβάτου,ύβατα


In [23]:
# Merge paradigms_df with main_df
if 'notes' not in main_df.columns:
      # Regular nouns
      regular_mask = main_df['paradigm'].fillna('').str.contains('0') == False
      regular_df = main_df[regular_mask].copy()
      regular_df = regular_df.merge(paradigms_df, how = 'left', on = 'paradigm')

      # Irregular nouns
      irregular_mask = main_df['paradigm'].fillna('').str.contains('0')
      irregular_df = main_df[irregular_mask].copy()
      irregular_df = irregular_df.merge(paradigms_df.rename(columns = {'paradigm' : 'tsakonian'}), how = 'left', on = 'tsakonian')

      # Concatenate
      main_df = pd.concat([regular_df, irregular_df], axis = 0)
      main_df = main_df.sort_values(by = 'tsakonian')     

main_df.head()

Unnamed: 0,tsakonian,greek,paradigm,source_id,notes,gen_sing,plural
0,άβατ̇ε,άκλαυτος,Ε,1.0,,,
1,άγιε,εκκλησία,Α4,,"ο, πλ. -ουνε, γεν. -ου",-ου,-ουνε
0,άγο,άλογο,Α0,1.0,,,
2,άγουστε,αύγουστος,,1.0,,,
3,άζ̌α,"αραία, όχι συχνά",,1.0,,,


In [24]:
main_df.to_sql('dictionary_entry', 
               conn, 
               if_exists='replace', 
               index=False,
               dtype = {'tsakonian': 'varchar(50)',
                        'greek': 'varchar(200)',
                        'paradigm': 'varchar(5)',
                        'source_id': 'bigint',
                        'notes': 'varchar(30)',
                        'gen_sing' : 'varchar(10)',
                        'plur' : 'varchar(10)',
               })

940

In [None]:
# Full old code

In [None]:
# Copy the full dictionary into the database
# main_df.to_sql('dictionary_entry', 
#                conn, 
#                if_exists='replace', 
#                index=False,
#                dtype = {'tsakonian': 'varchar(50)',
#                         'greek': 'varchar(200)',
#                         'paradigm': 'varchar(5)',
#                         'source_id': 'bigint',
#                         'notes': 'varchar(30)',
#                         'gen_sing' : 'varchar(10)',
#                         'plur' : 'varchar(10)',
#                })

# # Delete the temporary table if it exists
# try:
#     query("DROP TABLE sqlitestudio_temp_table;")
# except:
#     pass

# # Add primary keys and foreign keys by recreating the table
# # Obtained from SQLiteStudio
# recreating_query = """PRAGMA foreign_keys = 0;

# CREATE TABLE sqlitestudio_temp_table AS SELECT *
#                                           FROM dictionary_entry;

# DROP TABLE dictionary_entry;

# CREATE TABLE dictionary_entry (
#     id        INTEGER       PRIMARY KEY AUTOINCREMENT,
#     tsakonian VARCHAR (50)  NOT NULL,
#     greek     VARCHAR (200),
#     paradigm  VARCHAR (5),
#     source_id    INTEGER       REFERENCES dictionary_source (source_id),
#     notes     VARCHAR (30),
#     gen_sing  VARCHAR (10),
#     plur      VARCHAR (10)

# );

# INSERT INTO dictionary_entry (
#                                  tsakonian,
#                                  greek,
#                                  paradigm,
#                                  source_id,
#                                  notes,
#                                  gen_sing,
#                                  plur

#                              )
#                              SELECT tsakonian,
#                                     greek,
#                                     paradigm,
#                                     source_id,
#                                     notes,
#                                     gen_sing,
#                                     plur
#                                FROM sqlitestudio_temp_table;

# DROP TABLE sqlitestudio_temp_table;

# PRAGMA foreign_keys = 1;"""

# # Execute queries in a loop
# # Only one query can be executed at a time
# for q in recreating_query.split(';'):
#       query(q)

# # Save changes
# conn.commit()

# Update sources table

In [None]:
# Read sources.xlsx
sources_df_path = '../data/tables/sources.xlsx'
sources_df = pd.read_excel(sources_df_path)
sources_df.head()

In [None]:
# Copy the sources into the database
sources_df.to_sql('dictionary_source', 
                  conn, 
                  if_exists='replace', 
                  index=False,
                  dtype = {'id': 'bigint',
                           'source': 'varchar(100)',
                           'author': 'varchar(100)',
                           'year': 'varchar(10)',
                           'notes': 'varchar(100)'
                  })

In [None]:
# Copy the full sources table into the database
sources_df.to_sql('dictionary_source', 
                  conn, 
                  if_exists='replace', 
                  index=False,
                  dtype = {'id': 'bigint',
                           'source': 'varchar(100)',
                           'author': 'varchar(100)',
                           'year': 'varchar(10)',
                           'notes': 'varchar(100)'
                  })

# Delete the temporary table if it exists
try:
    query("DROP TABLE sqlitestudio_temp_table;")
except:
    pass

# Add primary keys and foreign keys by recreating the table
# Obtained from SQLiteStudio
recreating_query = """PRAGMA foreign_keys = 0;

CREATE TABLE sqlitestudio_temp_table AS SELECT *
                                            FROM dictionary_source;

DROP TABLE dictionary_source;

CREATE TABLE dictionary_source (
    source_id   INTEGER       PRIMARY KEY AUTOINCREMENT,
    title       VARCHAR (300),
    url         VARCHAR (200)
);

INSERT INTO dictionary_source (
                                    source_id,
                                    title,
                                    url
                                )
                                SELECT source_id,
                                         title,
                                         url
                                    FROM sqlitestudio_temp_table;

DROP TABLE sqlitestudio_temp_table;

PRAGMA foreign_keys = 1;"""

# Execute queries in a loop
# Only one query can be executed at a time
for q in recreating_query.split(';'):
      query(q)

# Save changes
conn.commit()