In [253]:
import pandas as pd
import gspread
from df2gspread import df2gspread as d2g
from oauth2client.service_account import ServiceAccountCredentials
import json
import re

In [375]:


def get_data(work_sheet, num: int):    
    # выгрузка данных из Spreadsheet
    year = re.search(r'(20)+(\d{2})', work_sheet.get_worksheet(num).title).group(0)
    data = work_sheet.get_worksheet(num).get_all_values()
    headers = data.pop(0)

    costs_start_counter = 0
    costs_end_counter = 0
    incomes_counter = 0

    # вычисление индексов строк с расходами
    for j in range(len(data)):
        if data[j][0] == 'ВСЕГО':
            costs_start_counter = j + 1
        elif 'Сумма' in data[j][0]:
            costs_end_counter = j
            break
    
    # вычисление индекса строки с доходами
    for j in range(len(data)):
        if 'доходы/расходы рабочие' in data[j][0]:
            incomes_counter = j
            break
            
    # загрузка данных в df        
    costs_df = pd.DataFrame(data[costs_start_counter:costs_end_counter], columns=headers)
    incomes_df = pd.DataFrame(data[incomes_counter:incomes_counter + 1], columns=headers)

    # переименование колонок
    costs_df = costs_df.rename(columns={'Среднее за год': f'{year}_AVG',
                                        'ГОД': f'{year}_year',
                                        'Наименование': 'name'})
    incomes_df = incomes_df.rename(columns={'Среднее за год': f'{year}_AVG',
                                        'ГОД': f'{year}_year',
                                        'Наименование': 'name'})

    monthes = {'Январь': '01', 'Февраль': '02', 'Март': '03', 'Апрель': '04', 'Май': '05', 
               'Июнь': '06', 'Июль': '07', 'Август': '08', 'Сентябрь': '09', 'Октябрь': '10', 
               'Ноябрь': '11', 'Декабрь': '12'}
    costs_df.columns = map(lambda x: f'{year}-{str(monthes[x])}-01' if x in monthes else x,
                           costs_df.columns)
    incomes_df.columns = map(lambda x: f'{year}-{str(monthes[x])}-01' if x in monthes else x,
                             incomes_df.columns)
    
    # формирование возвращаемых df: по категориям и аггрегированные колонки
    # расходы
    costs_sum_avg_df = costs_df.loc[:, ['name', f'{year}_year', f'{year}_AVG']]\
                               .replace('', '0')
    costs_df = costs_df.drop([f'{year}_year', f'{year}_AVG'], axis=1).replace('', '0')
    
    # доходы
    incomes_sum_avg_df = incomes_df.loc[:, ['name', f'{year}_year', f'{year}_AVG']]\
                                   .replace('', '0')
    incomes_df = incomes_df.drop([f'{year}_year', f'{year}_AVG'], axis=1).replace('', '0')
    

    return costs_df, costs_sum_avg_df, incomes_df, incomes_sum_avg_df

        

In [376]:
'1. Подготовка'
json_key = 'APIs/gspread-0d53a14d8aa7.json'
with open(json_key, 'r') as j:
    contents = json.loads(j.read())
scope = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']
my_mail = 'dima.greensfan@gmail.com'

In [256]:
'2. Авторизация'
credentials = ServiceAccountCredentials.from_json_keyfile_name(json_key, scope)
gs = gspread.authorize(credentials)

In [327]:
'3. Загрузка таблицы из Spreadsheet'
table_name = 'Сводная таблица расходов и доходов'
work_sheet = gs.open(table_name)
worksheets_list = work_sheet.worksheets()
worksheets_list.pop()

<Worksheet 'Расходы 2019_сводник' id:102313575>

In [404]:
'4. Формируем итоговые датафреймы'
costs_df = pd.DataFrame()
incomes_df = pd.DataFrame()

for i in range(len(worksheets_list))[::-1]:
    if i == len(worksheets_list) - 1:    
        costs_df, sum_avg_costs_df, incomes_df, sum_avg_incomes_df = get_data(work_sheet, i)
    else:
        df_list = get_data(work_sheet, i)
        
        costs_df = costs_df.merge(df_list[0], on='name', how='outer').fillna('0')
        sum_avg_costs_df = sum_avg_costs_df.merge(df_list[1], on='name', how='outer')\
                                           .fillna('0')
        incomes_df = incomes_df.merge(df_list[2], on='name', how='outer').fillna('0')
        sum_avg_incomes_df = sum_avg_incomes_df.merge(df_list[3], on='name', how='outer')\
                                               .fillna('0')
        
for col in costs_df.columns.drop('name'):
    costs_df[col] = costs_df[col].apply(lambda x: int(re.sub('[\sр.]', '', x)))

for col in sum_avg_costs_df.columns.drop('name'):
    sum_avg_costs_df[col] = sum_avg_costs_df[col].apply(lambda x: int(re.sub('[\sр.]', '', 
                                                                             x)))
for col in incomes_df.columns.drop('name'):
    incomes_df[col] = incomes_df[col].apply(lambda x: int(re.sub('[\sр.]', '', x)))

for col in sum_avg_incomes_df.columns.drop('name'):
    sum_avg_incomes_df[col] = sum_avg_incomes_df[col].apply(lambda x: int(re.sub('[\sр.]',\
                                                                                 '', x)))

In [405]:
'5. Транспонируем датафреймы'
costs_df = costs_df.set_index('name').T
sum_avg_costs_df = sum_avg_costs_df.set_index('name').T
incomes_df = incomes_df.set_index('name').T
sum_avg_incomes_df = sum_avg_incomes_df.set_index('name').T

### Перенос df в Google Spreadsheets

**I. Если таблица еще не создана**

In [380]:
'6. Создаем новую таблицу в Google Spreadsheets'
new_table_name = 'incomes_costs_data'
sheet = gs.create(new_table_name)

In [381]:
'7. Делаем таблицу видимой'
sheet.share(my_mail, perm_type='user', role='writer')

<Response [200]>

In [382]:
sheet_name = 'costs'
d2g.upload(costs_df, new_table_name, sheet_name, credentials=credentials, row_names=True)

sheet_name = 'sum_avg_costs'
d2g.upload(sum_avg_costs_df, new_table_name, sheet_name, credentials=credentials, 
           row_names=True)

sheet_name = 'incomes'
d2g.upload(incomes_df, new_table_name, sheet_name, credentials=credentials, 
           row_names=True)

sheet_name = 'sum_avg_incomes'
d2g.upload(sum_avg_incomes_df, new_table_name, sheet_name, credentials=credentials, 
           row_names=True)

<Worksheet 'sum_avg_incomes' id:65774203>

**II. Если таблица уже создана**

In [406]:
'6. Открываем таблицу в Google Spreadsheets'
table_name = 'incomes_costs_data'
sheet = gs.open(table_name)

In [391]:
sheet_name = 'costs'
d2g.upload(costs_df, table_name, sheet_name, credentials=credentials, row_names=True)

sheet_name = 'sum_avg_costs'
d2g.upload(sum_avg_costs_df, table_name, sheet_name, credentials=credentials, 
           row_names=True)

<Worksheet 'sum_avg_costs' id:1689862491>

In [407]:
sheet_name = 'incomes'
d2g.upload(incomes_df, table_name, sheet_name, credentials=credentials, 
           row_names=True)

sheet_name = 'sum_avg_incomes'
d2g.upload(sum_avg_incomes_df, table_name, sheet_name, credentials=credentials, 
           row_names=True)

<Worksheet 'sum_avg_incomes' id:65774203>