# Деление на стопы и поиск пробелов

In [None]:
import os
import sqlite3
from cltk.prosody.latin.hexameter_scanner import HexameterScanner
from cltk.prosody.latin.scansion_formatter import ScansionFormatter
scanner = HexameterScanner()

In [None]:
def givefiles(path):
    for d, dirs, files in os.walk(path):
        return files

In [None]:
def readfiles(path, file):
    file = os.path.join(path, file)
    with open(file, 'r', encoding='utf-8') as f:
        text = f.read().split('\n')
        return text

In [None]:
def delmacrons(word):
    word = word.replace('ā','a')    
    word = word.replace('ī','i')
    word = word.replace('ū','u') 
    word = word.replace('ē','e') 
    word = word.replace('ō','o')
    word = word.replace('w','u')
    word = word.replace('j','i')
    return word

In [None]:
def schemesyllabs(scheme): #счёт слогов для схемы из первого способа, шесть букв в границы слогов
    borders = []
    countsyl = 0
    for i in scheme:
        if i == 'D':
            countsyl += 3
            borders.append(countsyl)
        elif i == 'S' or i == 'T':
            countsyl += 2
            borders.append(countsyl)
    return borders[:-1]

In [None]:
def caesur(wordborders, feetborders, spaces):
    caesurus = []
    for i in wordborders:
        if i == feetborders[0] + 1:
            caesurus.append('трёхполовинная')
        elif i == feetborders[1] + 1:
            caesurus.append('пятиполовинная')
        elif i == feetborders[2] + 1:
            caesurus.append('семиполовинная')
        elif i == feetborders[3]:
            caesurus.append('буколическая')

        elif len(wordborders) != len(spaces.split(' ')) - 1:
            caesurus = ["Разделы"] # не равны пробелам
            
    return caesurus

In [None]:
def scanfeet(text, letscheme=''): 
    if not text.startswith('#'):
        listscan = list(scanner.scan(text.strip(' ')))
        listscan[1] = listscan[1].replace(' ','')
        feet = listscan[1].replace(' ','')

        scheme = ScansionFormatter().hexameter(listscan[1])
        if not '|' in scheme:
            if scheme.endswith('-U'):
                scheme = scheme[:-1] + '-'
                scheme = ScansionFormatter().hexameter(scheme)

        #print(scheme)
        feetborders = []
        feet = 0
        for i in scheme:
            if i == '|':
                feetborders.append(feet)
            else:
                feet += 1

        wordborders = []      
        sylstr = ''
        if listscan[5] != '':
            words = listscan[5]
        else:
            words = listscan[0]

        words = delmacrons(words)
        relist = delmacrons(listscan[7])
        syllabs = relist.split(', ')

        for number, syl in enumerate(syllabs):
            sylstr = sylstr + syl

            if words.startswith(sylstr) == False:
                #print(sylstr, words)
                wordborders.append(number)
                words = words.split(" ", 1)[-1]
                sylstr = syl

          
        #print(wordborders)
        #print(feetborders)
        if listscan[3] == True:            
            caesurus = caesur(wordborders, feetborders, listscan[0])
        else:
            caesurus = ["ошибка"]
        
        if letscheme:
            feetbordersM = schemesyllabs(letscheme)
            caesurusM = caesur(wordborders, feetbordersM, listscan[0])
        else:
            feetbordersM = ''
            caesurusM = ''            
        
    else:
        listscan = [text, '', '', '', '', '', '', '']  
        scheme = ''
        feetborders = ''
        wordborders = ''
        caesurus = '#'
        feetbordersM = ''
        caesurusM = ''
        
    if caesurus == caesurusM:
        err = 1
    elif caesurusM == '' and (caesurus == ["ошибка"] or caesurus == ["Разделы"]):
        err = 0
    elif caesurusM != '' and (caesurus == ["ошибка"] or caesurus == ["Разделы"]):
        err = 'alatius'
    elif caesurusM == '' and (caesurus != ["ошибка"] and caesurus != ["Разделы"]):
        err = 'cltk'
    else:
        err = 2
    
    if listscan[4]:
        if feetbordersM:
            if feetbordersM[-1] + 2 == listscan[4]:
                syl = 1
            else:
                syl = 0
        else:
            syl = 2
    else:
        syl = 2

    return(listscan, scheme, feetborders, wordborders, caesurus, feetbordersM, caesurusM, err, syl)

In [None]:
#ScansionFormatter().hexameter('-----------U')

In [None]:
#scanfeet('ferre pedēs haesit cōnāta āvertere vultūs')

In [None]:
#scanfeet('impulerit. Tantaenet animis caelestibus irae?', 'DDDSSS')

In [None]:
#'quīd fremat īmperiūm quīd raucōs tōrqueat aestūs'.startswith('quid')

In [None]:
#print(scanfeet('quid fremat imperium quid raucōs torqueat aestūs '))

In [None]:
conn = sqlite3.connect('latin_scan22.db')
cur = conn.cursor()
cur.execute("""
CREATE TABLE IF NOT EXISTS cltk_scan 
(id INTEGER PRIMARY KEY AUTOINCREMENT, file text, original text, scansion text, meter text, 
valid text, syllable_count int, accented text, scansion_notes text, syllables text, 
scheme text, feetborders text, wordborders text, caesurus text, feetbordersM text, caesurusM text, 
error text, countsyl text)
""")

conn.commit()
conn.close()

In [None]:
def basefirst(path, filename):
    text = readfiles(path, filename)
    steps = text[-1].split('FEETSTART')[1].split('FEETEND')[0].split(';')
    for i, verse in enumerate(text[:-1]):
        allverse = scanfeet(verse, steps[i])
        tobase = []
        tobase.append(filename)
        tobase.extend(allverse[0])
        tobase.append(allverse[1])
        tobase.append(str(allverse[2]))
        tobase.append(str(allverse[3]))
        tobase.append(str(allverse[4]))
        tobase.append(str(allverse[5]))
        tobase.append(str(allverse[6]))
        tobase.append(str(allverse[7]))
        tobase.append(str(allverse[8]))
        conn = sqlite3.connect('latin_scan22.db')
        cur = conn.cursor()      
        cur.execute('INSERT INTO cltk_scan VALUES (null, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', tobase)
        conn.commit()
        conn.close()

In [None]:
for i in givefiles('C:/Users/M/Desktop/latmacr/'):
    print(i)
    basefirst('C:/Users/M/Desktop/latmacr/', i)