In [2]:
import pandas as pd

xls_file = pd.read_excel('srd2014\RUNDATA.xls')

df = pd.DataFrame(xls_file)
df

Unnamed: 0,Signum,Plats,Socken,Härad,Kommun,Placering,Koordinater,Urspr. plats?,Nuv. koord.,Sockenkod/Fornlämningsnr.,...,Period/Datering,Stilgruppering,Ristare,Materialtyp,Material,Föremål,Övrigt,Alternativt signum,Referens,Bildlänk
0,Öl 1 $,Karlevi,Vickleby sn,Algutsrums hd,Mörbylånga,,6275755.1538971,ja,,"0879 (Vickleby), 10 [objektid=10087900100001]",...,V s 900-t,RAK,,sten,smålandsporfyr,runsten,västnordisk inskrift,B 1071; L 1323; DR 411,$=ATA Dnr 323-3515-2002; $=Gustavson 2002; Str...,http://kmb.raa.se/cocoon/bild/object_list_simp...
1,Öl 2 †$,Algutsrums kyrka,Algutsrums sn,Algutsrums hd,Mörbylånga,,6283750.1544290,,,0785 (Algutsrum),...,V,Pr3,,sten,,runsten,,B 1076; L 1324,"$=Plansch IV fig. 2, 145ff.; BN 2",
2,Öl 3 †$,Resmo kyrka,Resmo sn,Algutsrums hd,Mörbylånga,,6268390.1539280,,,0858 (Resmo),...,V efter 1050,Pr3 - Pr4?,,sten,,fragment av runsten,Parsten till Öl 4. Runföljden -unilu har av Br...,"L 1588, L 1589",$=SRI 1 plansch IV fig. 3; BN 5,
3,Öl 4 $,Resmo kyrka,Resmo sn,Algutsrums hd,Mörbylånga,Kalmar läns museum (KLM 24962),6268390.1539280,nej,,0858 (Resmo),...,V efter 1050,Pr4,,sten,grå kalksten,runsten,Parsten till Öl 3.,"L 1588, L 1589",$=ATA Dnr 4707/43; BN 4,
4,Öl 5 †,Bårby,Mörbylånga sn,Algutsrums hd,Mörbylånga,,6264000.1538250,,,0849 (Mörbylånga),...,V,Pr3,,sten,,runsten,,L 1320,BN 10,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11666,L 2233 =N_170,,,,,,,,,,...,,,,,,,,,,
11667,L 2234 =DR_100A,,,,,,,,,,...,,,,,,,,,,
11668,L 2991 =U_664,,,,,,,,,,...,,,,,,,,,,
11669,L 2992 =U_Fv1955;222,,,,,,,,,,...,,,,,,,,,,


Example of filtering for a particular inscription

In [3]:
df[df['Signum'].str.contains('Ög 154')]

sigs = df['Signum'].values
sigs

array(['Öl 1 $ ', 'Öl 2 †$ ', 'Öl 3 †$ ', ..., 'L 2991 =U_664 ',
       'L 2992 =U_Fv1955;222 ', 'L 2994 =DR_284 '], dtype=object)

In order to create tables with rundata_ids, we have to separate the ID from the runic text in each of the RUNDATA files

In [33]:
def get_table_from_text(file_name):
    with open("srd2014\{}".format(file_name), "r", encoding='iso-8859-1') as f:
        lines = f.readlines()

    df = pd.DataFrame(columns=['Signum', 'Text'])

    for line in lines:
        for signum in sigs:
            if line.find(signum) != -1:
                parts = line.split(signum)
                df = df.append({'Signum': signum, 'Text': parts[1]}, ignore_index=True)

    return df
        
runtext_df = get_table_from_text('RUNTEXT')
english_df = get_table_from_text('ENGLISH')


In [5]:
runtext_df

Unnamed: 0,Signum,Text
0,Sm 1 $,Â§P [uih]i[k]utr : resti : sten (:) ef(t)r : r...
1,Sm 3 M?,... \n
2,Sm 4 $M,Ã· finviÃ¾ir hiog Ã¾Ã¦ta kar : a : diura:biÃ¦r...
3,Sm 5,Â§A : kotr : sati : sten : Ã¾ana : eftR : keti...
4,Sm 6 $M,bosi : tlhi Â¶ kirki Â¶ bosi : ta^lh`Ã¾Â´i ste...
...,...,...
9151,L 2233 =N_170,\n
9152,L 2234 =DR_100A,\n
9153,L 2991 =U_664,\n
9154,L 2992 =U_Fv1955;222,\n


In [38]:
import psycopg2

conn = psycopg2.connect(
    host="localhost",
    database="rundata",
    user="user",
    password="pass"
)

cur = conn.cursor()


def create_core_table():
    cur.execute("DROP TABLE IF EXISTS inscription")
    cur.execute("""
        CREATE TABLE inscription (
            rundata_id text primary key,
            coordinates text ,
            material text
    )
    """)

    for i, row in df.iterrows():
        cur.execute("INSERT INTO inscription (rundata_id, coordinates, material) VALUES(%s, %s, %s)", (row['Signum'], row['Koordinater'], row['Material']))
    conn.commit()

def create_rundata_table(name, df):
    cur.execute("DROP TABLE IF EXISTS rundata_{}".format(name))
    cur.execute("""
        CREATE TABLE rundata_{} (
            rundata_id text,
            text text
    )
    """.format(name))

    for i, row in df.iterrows():
        cur.execute("INSERT INTO rundata_{} (rundata_id, text) VALUES(%s, %s)".format(name), (row['Signum'], row['Text']))
    conn.commit()

create_core_table()
create_rundata_table('english', english_df)
create_rundata_table('runtext', runtext_df)

cur.close()
conn.close()