In [None]:
import numpy as np
import pandas as pd
import seaborn as sn
import plotly.express as px
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('parserData (1).csv')
df.head()

In [None]:
df.columns = ['Date','Time', 'Lat', 'Long','Dust Density', 'Dust Density Accuracy', 'Ice', 'Ice Accuracy']
df.head()

In [None]:
df.describe(include='all')

In [None]:
df.info()

In [None]:
cor = df.corr()
sn.heatmap(cor, annot=True)
plt.show()
#density is correlated to latitude 

In [None]:
df['Dust Density'].plot(kind='hist')
plt.title('Dust Density')
plt.grid()
plt.show()
#density distribute around 0-0.001

In [None]:
plt.figure(figsize=(14,8))
plt.scatter(x=df.Long,
            y=df.Lat,
            s=df['Dust Density'],
            color='red',
            alpha=0.5)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid()
plt.title('Mars Dust Density')
plt.show()

In [None]:
plt.figure(figsize=(14,8))
plt.scatter(x=df.Long,
            y=df.Lat,
            s=df['Ice'],
            color='red',
            alpha=0.5)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid()
plt.title('Mars Ice')
plt.show()

In [None]:
fig = px.scatter(df, x='Long', y='Lat',
                 color='Dust Density', size='Dust Density', 
                  opacity=0.5)
fig.update_layout(title='Mars Dust Density',
                  xaxis_title='Longitude',  
                  yaxis_title='Latitude')
fig.show()

In [None]:
apr1=df[df['Date']=='01-Apr-2015']

In [None]:
fig = px.scatter(apr1, x='Long', y='Lat',
                 color='Dust Density', size='Dust Density', 
                  opacity=0.5)
fig.update_layout(title='Mars Dust Density',
                  xaxis_title='Longitude',  
                  yaxis_title='Latitude')
fig.show()

In [None]:
apr2=df[df['Date']=='04-Apr-2015']
apr2

In [None]:
fig = px.scatter(apr2, x='Long', y='Lat',
                 color='Dust Density', size='Dust Density', 
                  opacity=0.5)
fig.update_layout(title='Mars Dust Density',
                  xaxis_title='Longitude',  
                  yaxis_title='Latitude')
fig.show()

In [None]:
apr3=df[df['Date']=='05-Apr-2015']
apr3

In [None]:
fig = px.scatter(apr3, x='Long', y='Lat',
                 color='Dust Density', size='Dust Density', 
                  opacity=0.5)
fig.update_layout(title='Mars Dust Density',
                  xaxis_title='Longitude',  
                  yaxis_title='Latitude')
fig.show()

In [None]:
fig = px.scatter(apr1, x='Long', y='Lat',
                 color='Ice', 
                  opacity=0.5)
fig.update_layout(title='Mars Water Ice',
                  xaxis_title='Longitude',  
                  yaxis_title='Latitude')
fig.show()

In [None]:
df

In [None]:
from datetime import datetime
df['Date'] = pd.to_datetime(df['Date']).dt.strftime('%Y-%m-%d')
df

In [None]:
day25=df[df['Date']=='2015-04-25']
print(day25)

In [None]:
day30=df[df['Date']=='2015-04-30']
print(day30)

In [None]:
df2=df.copy()

In [None]:
df2['Date_Time']=df2['Date']+df2['Time']
df2.head()
print(df2['Date_Time'])

In [None]:
dust = df2.drop(['Date','Time','Lat','Long','Dust Density Accuracy','Ice','Ice Accuracy'], axis=1)
dust = dust.set_index(['Date_Time'])
dust

In [None]:
plt.figure(figsize=(14,8))
plt.xlabel('Date_Time')
plt.ylabel('Dust Density')
plt.xticks(rotation=90)
plt.plot(dust)
#too much data 

In [None]:
#extract only 5 days data 
sample=dust.loc['2015-04-2500:00:25.617':'2015-04-3023:59:43.891']
sample #last 5 day 

In [None]:
plt.figure(figsize=(14,8))
plt.xlabel('Date_Time')
plt.ylabel('Dust Density')
plt.xticks(rotation=90)
plt.plot(sample)

In [None]:
from statsmodels.tsa.stattools import adfuller 

In [None]:
#test stationary 
print('Results of Dickey Fuller Test:')
dftest = adfuller(sample['Dust Density'], autolag='AIC')

dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
for key,value in dftest[4].items():
    dfoutput['Critical Value (%s)'%key] = value
    
print(dfoutput)

#result shows stationarity & no unit root detected 

# Auto regressive Model 

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from pandas.plotting import autocorrelation_plot
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA

In [None]:
autocorrelation_plot(sample['Dust Density'])
plt.show()

In [None]:
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(sample['Dust Density'],lags=40,ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(sample['Dust Density'],lags=40,ax=ax2)

#MA model from ACF =3, OR 4
#AR model from PACF = 1
#no differencing 

In [None]:
#model 1 
model=ARIMA(sample['Dust Density'],order=(1,0,2))
model_fit=model.fit()

In [None]:
dust = df.drop(['Time','Lat','Long','Dust Density Accuracy','Ice','Ice Accuracy'], axis=1)
dust = dust.set_index(['Date'])
dust

In [None]:
plt.xlabel('Date')
plt.ylabel('Dust Density')
plt.xticks(rotation=90)
plt.plot(dust)

In [None]:
rolmean = dust.rolling(window=365).mean()
rolstd = dust.rolling(window=365).std()
print(rolmean,rolstd) 

In [None]:
orig = plt.plot(dust, color='blue', label='Original')
mean = plt.plot(rolmean, color='red', label='Rolling mean')
std = plt.plot(rolstd, color='black', label='Rolling std')
plt.legend(loc='best')
plt.title('Rolling Mean & Standard Deviation')
plt.xticks(rotation=90)

plt.show(block=False)

In [None]:
newdust=df.drop(['Time','Lat','Long','Dust Density Accuracy','Ice','Ice Accuracy'], axis=1)
newdust


In [None]:
dust_ave=dust.groupby(pd.Grouper(level='Date',axis=0)).mean()
dust_ave

In [None]:
plt.xlabel('Date')
plt.ylabel('Dust Density')
plt.xticks(rotation=90)
plt.plot(dust_ave)
plt.title('Daily Moving Average of Dust Density')

In [None]:
orig = plt.plot(dust, color='blue', label='Original')
plt.xlabel('Date')
plt.ylabel('Dust Density')
plt.xticks(rotation=90)
ave = plt.plot(dust_ave, color = 'red', label="Moving Average Daily")
plt.legend(loc='best')
plt.title('Dust Density Rolling Mean & Original Dust Density')
plt.xticks(rotation=90)

plt.show(block=False)

In [None]:
dust_log = np.log(dust)
plt.xticks(rotation=90)
plt.plot(dust_log)

In [None]:
movingAverage = dust_log.rolling(window=30).mean()
movingSTD = dust_log.rolling(window=30).std()
plt.plot(dust_log)
plt.xticks(rotation=90)
plt.plot(movingAverage, color='red')


In [None]:
#concatanate date and time 
df.head()
df['Date'] = df['Date']+df['Time']
print(df)

In [None]:
#drop time 
sample=df.drop(['Time'], axis=1)
#drop everything but date and dust density 
sample=sample.drop(['Lat','Long','Dust Density Accuracy','Ice','Ice Accuracy'],axis=1)
sample[sample['Date']=='2015-04-2500:00:25.617']

In [None]:
sample=dust.loc['2015-04-25':'2015-04-30']
sample #last 5 day 

In [None]:
plt.xlabel('Date')
plt.ylabel('Dust Density')
plt.xticks(rotation=90)
plt.plot(sample)