In [212]:
import pymongo 
from pprint import pprint
from bson.objectid import ObjectId
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime 


### Connect to Database

In [213]:
client = pymongo.MongoClient('localhost:27017')
database_name = 'BDD'
db = client.get_database(name=database_name)
pprint(db.list_collection_names())


['aggregate', 'earthquake', 'weather']


In [214]:
earthquake_collection = db.get_collection(name='earthquake')
weather_collection = db.get_collection(name='weather')


### Analyzing eathquake Data 

In [215]:
# connecting to earthquake collection 
earthquake_collection = db.get_collection(name='earthquake')

# finding element
def count_documents(key:str,value : str):
    documents = earthquake_collection.find({key: value})
    count = 0
    for document in documents : 
        count += 1
    return count
count_number = count_documents('type','Feature')
print(count_number)

9056


### setting Up Data needed

In [216]:
df = pd.DataFrame(earthquake_collection.aggregate([

    {"$addFields": {
      "convertedDate": { "$convert":
         {
            "input": "$properties.updated",
            "to": "date"
         }
      }
    }
    },
    {
        "$project": {
            "properties.mag":1,"convertedDate":1,"properties.type":1,"geometry.coordinates":1,'properties.tsunami' :1
    },
    },
    {
      "$sort" : {
        "convertedDate" : 1 
      }
    }
    
]))


In [217]:

df['geometry'] = df['geometry'].apply([pd.Series])

In [218]:
lat_list = []
long_list = []
for x in df['geometry']: 
    lat = x[0]
    long = x[1]
    lat_list.append(lat)
    long_list.append(long)
df['latitude'] = lat_list
df['longitude'] = long_list
df.drop(['_id'],axis=1,inplace=True)


In [219]:
mag = list()
type_earthquake = list()
tsunami  = list()
for x in df['properties'] : 
   dict_values = list(x.values())
   mag.append(dict_values[0])
   type_earthquake.append(dict_values[2])
   tsunami.append(dict_values[1])

df['magnitude'] = mag
df['Type'] = type_earthquake
df['tsunami'] = tsunami
df.drop('properties',axis=1,inplace=True)
df.drop('geometry',axis=1,inplace=True)

In [220]:
df.head(6)

Unnamed: 0,convertedDate,latitude,longitude,magnitude,Type,tsunami
0,2022-12-21 00:23:25.582,-122.815331,38.847168,0.29,earthquake,0
1,2022-12-21 00:41:53.145,-116.29,37.0184,0.1,earthquake,0
2,2022-12-21 00:51:35.011,-122.805168,38.825832,0.67,earthquake,0
3,2022-12-21 00:56:55.390,-66.951833,17.943667,1.72,earthquake,0
4,2022-12-21 01:59:20.690,-66.960333,17.948667,2.03,earthquake,0
5,2022-12-21 02:00:14.831,-122.804665,38.824165,0.75,earthquake,0


### Exploratory Data Analysis - Earthquake

##### distrubition des catastrophes naturelles sur le monde entier 

In [253]:
# la distrubition des 
fig = px.scatter_geo(
    data_frame = df, lat=df.longitude,lon=df.latitude,projection='orthographic',color=df['Type'],
    title='La distrubition des catastrophes naturelles dans le monde entier ',
    opacity=.5)
fig.update_traces(marker=dict(size=10))
fig.show()

##### Représentation sur les types des catastrophes naturelles possédées 

In [222]:
fig_pie = px.pie(df, values=df['magnitude'], names=df['Type'],hole=.3,color_discrete_sequence=px.colors.sequential.RdBu)
fig_pie.show()

on applique notre analyse sur typiquement 99% qui représente les données type earthquake 

##### Representation de la densité 

In [255]:
fig_mapbox = px.density_mapbox(df,lat = df['longitude'],lon=df['latitude'],z=df['magnitude'],
radius = 8,center=dict(lat=10,lon=20),zoom =1 ,hover_data = ['convertedDate'],
mapbox_style='carto-positron',width=900,height=500
)
fig_mapbox.show()

##### Combien de Tsunami on été réalisée au fils des années ? 

In [232]:
df['tsunami'].loc[df['tsunami'] == 1].count()

9

##### Y a t-il un changement au fil des années sur les taux de séismes ? 

In [233]:
fig = px.line(df, x=df['convertedDate'], y=df['magnitude'],hover_data=['tsunami']
,title="Evolution des séismes sur une durée de deux mois ")
fig.update_layout(
    xaxis_title="Date"
)
fig.show()

### Y-a-t-il une corrélation entre la météo et le séisme ? 

##### Import weather data 

In [234]:
df_weather = pd.DataFrame(weather_collection.aggregate([
    
    {"$project" : {
      "long" : '$location.lon',
      "lat" : '$location.lat',
      "time" :"$forecast.forecastday.hour.time",
      "wind_kph" : '$forecast.forecastday.hour.wind_kph',
      "pression" : "$forecast.forecastday.hour.pressure_in",
      "precip_mm" : '$forecast.forecastday.hour.precip_mm',
    }}
]))
df_weather.drop('_id',axis=1,inplace=True)

##### Data CLeaning 
- les NaN sont représentés dans toute une ligne --> la seule solution est de les sumprimées 

In [236]:
df_weather =df_weather.dropna()

In [237]:
df_weather.isna().sum()

long         0
lat          0
time         0
wind_kph     0
pression     0
precip_mm    0
dtype: int64

#### On créer une nouvelle dataframe 
- Afin d'avoir une bonne représentation de nos données, on suppose que la nouvelle représentation est sera plus lisible 

In [86]:
time_db = list()
wind_db = list()
pression_db = list()
precip_db = list()
lat = list()
long = list()
for i,r in df_weather.iterrows():
    if len(r['time']) > 0 : 
        time = r['time'][0]
        for t in range(0,24) :
            actual_time = time[t]
            actual_wind= r['wind_kph'][0][t]
            time_db.append(time[t])
            wind_db.append(r['wind_kph'][0][t])
            pression_db.append(r['pression'][0][t])
            precip_db.append(r['precip_mm'][0][t])
            lat.append(r['lat'])
            long.append(r['long'])
            
df_weather_new = pd.DataFrame(
    {
        'time' : time_db,
        'wind speed' : wind_db,
        'pression' :pression_db,
        'precip mm' : precip_db,
        'latitude' : lat,
        'longitude' : long
        
    }
)

    

In [238]:
df_weather_new.head(10)

Unnamed: 0,time,wind speed,pression,precip mm,latitude,longitude
0,2023-01-18 00:00,16.6,29.27,0.0,60.71,-151.24
1,2023-01-18 01:00,15.6,29.27,0.0,60.71,-151.24
2,2023-01-18 02:00,14.6,29.27,0.0,60.71,-151.24
3,2023-01-18 03:00,13.7,29.28,0.0,60.71,-151.24
4,2023-01-18 04:00,13.3,29.27,0.0,60.71,-151.24
5,2023-01-18 05:00,13.0,29.27,0.0,60.71,-151.24
6,2023-01-18 06:00,12.6,29.27,0.0,60.71,-151.24
7,2023-01-18 07:00,12.6,29.26,0.0,60.71,-151.24
8,2023-01-18 08:00,12.6,29.26,0.0,60.71,-151.24
9,2023-01-18 09:00,12.6,29.25,0.0,60.71,-151.24


- Une fonction qui prends 3 parametres(latitude,longitude,date) souhaitée et nous renvoie la collection recherché 

In [242]:
def get(lat,long,date) : 
    
    date = str(date)[0:14] + '00'
    
    return df_weather_new.loc[(df_weather_new['time'] == date)&(df_weather_new['latitude'] == long)&(
    df_weather_new['longitude'] == lat)].head(1)

collection_recherché = get(-122.81,38.84,'2022-12-21 01:59:20.690')
collection_recherché.head()

Unnamed: 0,time,wind speed,pression,precip mm,latitude,longitude
206281,2022-12-21 01:00,2.3,30.28,0.0,38.84,-122.81


##### Méthode pour extraire les dates 

In [250]:
class extract : 
    @staticmethod
    def dt(dateframe) : 
        for x in dateframe : 
            actual_date = datetime.strptime(x, '%Y-%m-%d %HH:%MM')
            return f'{actual_date.year}-{actual_date.month}-{actual_date.day}'

##### Graphic sur la pression du vent 

In [254]:
fig = px.line(data_frame=df_weather_new[0:24],x=df_weather_new['time'][0:24],y=df_weather_new['pression'][0:24],
title='La projection de la pression sur un seule jour ')
fig.update_layout(
    xaxis_title="Date",
    yaxis_title="Pression du vent"
)
fig.show()