In [None]:
from pythonopensubtitles.opensubtitles import OpenSubtitles
from pythonopensubtitles.utils import File
import pandas as pd
from tqdm.notebook import tqdm
import time
from datetime import date

In [None]:
def get_subtitles_by_id(imdbid):

    data = ost.search_subtitles([{'sublanguageid': 'eng','imdbid': imdbid}])

    try:
        first_sub_data = data[0]
        id_subtitle_file = first_sub_data.get('IDSubtitleFile')
        moviename = first_sub_data.get('MovieName')
        downloaded = ost.download_subtitles([id_subtitle_file], 
                                output_directory='/home/eva/Diploma/os/',
                                override_filenames={id_subtitle_file:f'{imdbid}.srt'})

        if downloaded is None:
            print(f'Subs for {moviename}, imdb_id {imdbid} not dloaded')
            moviename, first_sub_data, downloaded = 'NONNEE', 'NONNE', 'NONNE'

    except:
        print(f'While dloading subs for imdb_id {imdbid} I got error')
        moviename, first_sub_data, downloaded = 'ERROR', 'ERROR', 'ERROR'
    
    return moviename, first_sub_data, downloaded

In [None]:
def get_subtitles_by_list(imdbidlist):

    df = pd.DataFrame(columns=['moviename', 'data','file'])

    current_date = str.replace(str(date.today()), '-', '_')
    current_time = time.strftime('%H_%M_%S', time.localtime())
    temporal_txt_path = 'fil_sub_download_temp_file_' + \
        current_date + '_' + current_time + '.csv'

    for imdbid in tqdm(imdbidlist):

        moviename, first_sub_data, downloaded = get_subtitles_by_id(imdbid)
        new_row = pd.DataFrame({'moviename':moviename,'data':str(first_sub_data),'file':str(downloaded)}, index=[imdbid])
        df = pd.concat([df.loc[:,:],new_row],)

        with open(temporal_txt_path, 'a') as temp_file:
            temp_file.write(
            ';'.join([str(imdbid), str(moviename), str(first_sub_data), str(downloaded)])+'\n')

        time.sleep(2)

    df.to_csv('fil_sub_download.csv', sep=';')

    return True

In [None]:
ost = OpenSubtitles() 
cred = eval(open('/home/eva/Diploma/moviesdataset/passwords_1.txt').read())
# cred = eval(open('/home/eva/Diploma/moviesdataset/passwords_2.txt').read())
ost.login(cred['login'], cred['password'])

df_sub_skip = pd.read_csv('fil_sub_download_temp_file.csv', sep=';', names=['imdb_id','moviename', 'data','file'])
df_sub_skip = df_sub_skip.astype({'imdb_id': 'int32','moviename': 'string','data': 'object','file': 'object'})
df_sub_skip = df_sub_skip.drop_duplicates(subset='imdb_id')
df_sub_skip = df_sub_skip.reset_index(drop=True)

df_to_sub = pd.read_csv('df_to_sub.csv', sep=';', index_col='id')
df_to_sub['imdb_id'] = df_to_sub['imdb_id'].apply(lambda x: x.replace('tt',''))
df_to_sub = df_to_sub.astype({'keywords': 'object','title': 'string','imdb_id': 'int32','keywords_len': 'int32'})
df_to_sub = df_to_sub[~df_to_sub.imdb_id.isin(df_sub_skip.imdb_id)]
df_to_sub = df_to_sub.sort_values(by='id')

imdbidlist = df_to_sub.iloc[0:200,:].imdb_id
# imdbidlist = ['80339']

print(f'Subs to load: {len(imdbidlist)}')

get_subtitles_by_list(imdbidlist)