# Data Visualisation in Python

## Import Necessary Libraries

In [None]:
#! pip install plotly

In [None]:
#! pip install folium

In [None]:
# for data 
import pandas as pd
# for scientific computation
import numpy as np

In [None]:
# for visualisation
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.io as pio

In [None]:
import folium

## Data Preparation

### Read Data

In [None]:
# read the data
df = pd.read_csv("../../Data/Meteorite_Landings_20240212.csv", sep = ',')

In [None]:
df.shape

In [None]:
# get idea of the look
df.sample(10)

### Analytical Exploration

In [None]:
# get idea of columns and types
df.info()

In [None]:
df.describe()

In [None]:
# change the format to avoid scientific notation, e.g. e+04
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [None]:
df['year'] = df['year'].astype(int)

In [None]:
# see which are the attribute labels
list(df)

In [None]:
# to check null values in data
df.isnull().sum()

In [None]:
# delete rows with unknown year
df.dropna(subset=['year'], inplace=True)

In [None]:
# replace the missing mass with avarage
mass_mean = df['mass (g)'].mean()
df['mass (g)'].fillna(int(mass_mean), inplace=True)

In [None]:
# check observation types
nametype_group = df.groupby(by='nametype').size()
nametype_group

In [None]:
# relic only
dfr = df[df['nametype'] == 'Relict']

In [None]:
dfr.describe()

In [None]:
# valid only
dfv = df[df['nametype'] == 'Valid']

In [None]:
dfv.describe()

In [None]:
df.recclass.unique().tolist()

In [None]:
recclass_group = df.groupby(by = 'recclass').size()
recclass_group

### Visual Exploration

In [None]:
#create histogram to visualize values in dataset
df.hist()

In [None]:
# plot all observations to discover outliers
df.plot.scatter(x='year', y='mass (g)', c='year', colormap='viridis')

In [None]:
# z-score filter of outliers: (z-score < 3*std) in a column
from scipy import stats
dfny = df[np.abs(stats.zscore(df['year'])) < 3.5]
dfnm = dfny[np.abs(stats.zscore(df['mass (g)'])) < 3.5]
dfnm.plot.scatter(x='year', y='mass (g)', c='year', colormap='viridis')

In [None]:
# visualise the features and the response using scatterplots
sns.pairplot(dfnm, x_vars=['year', 'recclass'], y_vars='mass (g)', height=5, aspect=0.8)

In [None]:
# compare in box-plot
ax = df.boxplot(by='nametype', column='mass (g)', figsize=(4, 4))
ax.set_xlabel('Class')
ax.set_ylabel('Mass (g)')
plt.suptitle('')
plt.title('')

plt.tight_layout()
plt.show()

In [None]:
%matplotlib inline
nametype_group.plot.bar()

### Interactive 3D Visualisation with Plotly

In [None]:
# line chart
fig = px.line(x=[1,2, 3], y=[1, 2, 3]) 
 
# showing the plot
fig.show()

In [None]:
fig = px.line_3d(x=[1, 2, 3, 4, 5, 6], y=[6, 5, 4, 3, 2, 1], z=[1, 2, 3, 1, 2, 3]) 
fig.show()

In [None]:
# 3D scatter plot
fig = px.scatter_3d(dfnm, x="year", y="mass (g)", z='recclass', color="year", size='mass (g)', size_max=40, opacity=0.8)
fig.show()

In [None]:
# bar chart
fig = px.bar(dfnm, x="year", y="mass (g)", color = 'year') 
fig.show()

In [None]:
# pie chart
x=[[1, 2, 3, 4, 5], ['a', 'b', 'c', 'd', 'e']]

fig = px.pie(x, values = x[0], names = x[1]) 
fig.show()

In [None]:
import plotly.graph_objects as go 
import numpy as np 
 
# Data to be plotted
x = np.outer(np.linspace(-2, 2, 30), np.ones(30)) 
y = x.copy().T 
z = np.cos(x ** 2 + y ** 2) 
 
# plotting the figure
fig = go.Figure(data=[go.Surface(x=x, y=y, z=z)]) 
 
fig.show()

### Visualise Geodata with Folium

In [None]:
dfnm.shape

In [None]:
dfnm.isnull().sum()

In [None]:
subset = ['reclat','reclong']
dfnm.dropna(subset = subset, inplace=True)

In [None]:
dfnm.shape

In [None]:
dfnm.sample(10)

In [None]:
# build a map
# center = folium.Marker([55.6819, 12.5627], popup="Cphbusiness-Lyngby", tooltip="new building")
fmap = folium.Map(location = [55.6819, 12.5627], zoom_start = 3)

In [None]:
# add countries
political_countries_url = ("http://geojson.xyz/naturalearth-3.3.0/ne_50m_admin_0_countries.geojson")
folium.GeoJson(political_countries_url).add_to(fmap)

In [None]:
fmap

In [None]:
# add markers for the meteorites
for index, row in dfnm.iloc[:100].iterrows():
    marker = ''
    loc = row['reclat'], row['reclong'] 
    pop = str(int(row['year']))
    marker = folium.Marker(location=loc, popup=pop, 
                           tooltip = "Click me!", icon = folium.Icon(color="green", icon='flag'))
    marker.add_to(fmap)

In [None]:
fmap

In [None]:
fmap.add_child(folium.LatLngPopup())

In [None]:
# add one more layer
folium.TileLayer('openstreetmap').add_to(fmap)

In [None]:
folium.LayerControl().add_to(fmap)

In [None]:
fmap.save("../data/meteorit.html")

In [None]:
fmap

## See More
- https://plotly.com
- https://darigak.medium.com/your-guide-to-folium-markers-b9324fc7d65d
- https://deparkes.co.uk/2016/06/10/folium-map-tiles/

### Try various formatting parameters