In [34]:
import pandas as pd
import requests
import io
import re
import pymongo
import database as db

In [35]:
WEEK = []
with open('week.txt', 'r') as file:
    SEMAINE = int(file.read().strip())

In [36]:
def cereobsReport(type, semaine, id_culture, stade_dev=None):
    if type == 'Condition':
        url = f"https://cereobs.franceagrimer.fr/cereobs-sp/api/public/publications/rapportCereobs?semaineObservation={semaine}&idCulture={id_culture}&typePublication=5"
    elif type == 'Developpement':
        url = f"https://cereobs.franceagrimer.fr/cereobs-sp/api/public/publications/rapportCereobs?semaineObservation={semaine}&idCulture={id_culture}&idStadeDev={stade_dev}&typePublication=3"
    else:
        print('Wrong type')
        return 0
    r = requests.get(url)
    content_type = r.headers.get('Content-Type')
    content_disposition = r.headers.get('Content-Disposition')
    if content_disposition:
        match = re.search(r'\d{4}-S\d{2}', content_disposition) 
        if match:
            WEEK.append(match.group())
        else:
            print('Something went wrong with Year and Week in file extraction')
            return 0            
    if content_type == 'application/vnd.ms-excel':
        df = pd.read_excel(io.BytesIO(r.content))
        if type == 'Developpement': 
            df = df.rename(columns={df.columns[0]: 'Region', df.columns[1]: 'Actual'})
        elif type == 'Condition':
            df = df.rename(columns={df.columns[0]: 'Region', df.columns[1]: 'Très mauvaises', df.columns[2]: 'Mauvaises', df.columns[3]: 'Assez bonnes', df.columns[4]: 'Bonnes', df.columns[5]: 'Très bonnes'})
            df = df[['Region', 'Très mauvaises', 'Mauvaises', 'Assez bonnes', 'Bonnes', 'Très bonnes']]
    else:
        df = pd.DataFrame()
    return df

In [37]:
def insert_db(dfFrance):
    rFrance = 'Nothing in the database, add historical data first'
    dbname = db.get_database()
    collection_name_france = dbname["dev_cond_france"]
    dataFrance = dfFrance.to_dict('records')
    last_doc_france = collection_name_france.find_one(
            sort=[( 'Date', pymongo.DESCENDING )]
        )
    if last_doc_france is not None:
            if not dfFrance.empty:
                if dfFrance['Date'].iloc[0] != last_doc_france['Date']:
                    rFrance = str(collection_name_france.insert_many(dataFrance))
                    rFrance = 'Développement des cultures France : ' + rFrance
                    with open('week.txt', 'w') as file:
                        file.write(str(SEMAINE+1))
                else:
                    rFrance = 'Moyenne France : Document non inséré, doublon date avec le dernier document en base.'
            else:
                rFrance = 'NO DATA TO IMPORT TODAY, EMPTY DATAFRAME'
    return rFrance

In [38]:
def monday_of_week(year, week):
    first_day_of_year = pd.to_datetime(f'{year}-01-01')
    first_monday = first_day_of_year - pd.Timedelta(days=first_day_of_year.dayofweek)
    monday_of_given_week = first_monday + pd.Timedelta(weeks=week-1)
    return monday_of_given_week

In [39]:
cultureMap = {
    2: 'Blé tendre',
    3: 'Blé dur',
    5: 'Maïs grain'
}
bleMap = {
    1: 'Semis',
    2: 'Levée',
    3: 'Début tallage',
    4: 'Épi 1cm',
    5: 'Deux noeuds',
    6: 'Épiaison', 
    7: 'Récolte'
}
maisMap = {
    8: 'Semis',
    9: 'Levée',
    10: '6/8 feuilles visibles',
    11: 'Floraison femelle',
    12: 'Humidité du grain 50%',
    13: 'Récolte'
}

In [40]:
devMap = []
for idx, da in cultureMap.items():
    tmpMap = {}
    tmpMap['Culture'] = da
    if da.startswith('Blé'):
        for i, d in bleMap.items():
            tmp = cereobsReport('Developpement', SEMAINE, idx, i)
            if not tmp.empty:
                data = tmp[tmp['Region'] == 'Moyenne France']['Actual'].item()
                tmpMap[d] = data
            else:
                tmpMap[d] = None
    else:
        for i, d in maisMap.items():
            tmp = cereobsReport('Developpement', SEMAINE, idx, i)
            if not tmp.empty:
                data = tmp[tmp['Region'] == 'Moyenne France']['Actual'].item()
                tmpMap[d] = data
            else:
                tmpMap[d] = None
    devMap.append(tmpMap)

In [41]:
condMap = []
for idx, da in cultureMap.items():
    tmpMap = {}
    tmpMap['Culture'] = da
    tmp = cereobsReport('Condition', 857, idx)
    if not tmp.empty:
        data = tmp[tmp['Region'] == 'Moyenne France (1)'][['Très mauvaises', 'Mauvaises', 'Assez bonnes', 'Bonnes', 'Très bonnes']].to_dict(orient='records')[0]
    else:
        data = {'Très mauvaises': None, 'Mauvaises': None, 'Assez bonnes': None, 'Bonnes': None, 'Très bonnes': None}
    condMap.append(tmpMap | data)

In [42]:
devDf = pd.DataFrame(devMap)
condDf = pd.DataFrame(condMap)
df = pd.merge(devDf, condDf, on=['Culture'], how='inner')
if len(list(set(WEEK))) == 1:
    df['Semaine'] = WEEK[0]
else:
    print('Non-unique weeks in data, check download')
df['Year'] = int(WEEK[0].split('-')[0])
df['Week'] = int(WEEK[0].split('-')[1][1:])
df['Date'] = df.apply(lambda row: monday_of_week(row['Year'], row['Week']), axis=1)

In [43]:
df = df[['Culture', 'Semaine', 'Semis', 'Levée', '6/8 feuilles visibles', 'Floraison femelle', 'Humidité du grain 50%', 'Récolte', 'Très mauvaises', 'Mauvaises', 'Assez bonnes', 'Bonnes', 'Très bonnes', 'Week', 'Year', 'Date', 'Début tallage', 'Épi 1cm', 'Deux noeuds', 'Épiaison']]

In [44]:
rFrance = insert_db(df)
print(rFrance)

Développement des cultures France : <pymongo.results.InsertManyResult object at 0x000002039B2C74F0>
