# Sprint 8.2 - Power BI amb Python - Nivell 1

Els 7 exercicis del nivell 1 de la tasca 01

Antes de empezar a crear los scripts, en Power BI se hacen las transformaciones necesarias a las tablas (el cálculo de la edad en la tabla _users_ y la transformación del precio a formato float en la tabla _products_) y se crean las relaciones del modelo.

### Ex 1 - Una variable numèrica.

In [None]:
# The following code to create a dataframe and remove duplicated rows is always executed and acts as a preamble for your script: 

# dataset = pandas.DataFrame(undefined)
# dataset = dataset.drop_duplicates()

# Paste or type your script code here:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = dataset

transactionsND = df[df.declined == 0]

sns.histplot(transactionsND, x='amount', bins=10, color='purple')

plt.title('Monto de transacciones (no rechazadas)', fontsize=12)
plt.xlabel('Monto (€)')
plt.ylabel('Recuento')
plt.tight_layout(pad=0, w_pad=0, h_pad=0)

plt.show()

### Ex 2 - Dues variables numèriques.

In [None]:
# The following code to create a dataframe and remove duplicated rows is always executed and acts as a preamble for your script: 

# dataset = pandas.DataFrame(age, amount)
# dataset = dataset.drop_duplicates()

# Paste or type your script code here:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = dataset

df = df.groupby('age').sum('amount')

sns.relplot(data=df, x='age', y='amount', color='purple')

plt.title('Monto gastado por edad de usuarios', fontsize=17)
plt.xlabel('Edad')
plt.ylabel('Monto (€)')
plt.tight_layout(pad=0, w_pad=0, h_pad=0)

plt.show()

### Ex 3 - Una variable categòrica.

In [None]:
# The following code to create a dataframe and remove duplicated rows is always executed and acts as a preamble for your script: 

# dataset = pandas.DataFrame(declined)
# dataset = dataset.drop_duplicates()

# Paste or type your script code here:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = dataset

declined_count = df['declined'].value_counts()
labels = ['aceptadas', 'rechazadas']
sizes = declined_count.values

plt.figure(figsize=(5, 4))
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=180, colors=['indigo', 'violet'])
plt.title('Porcentaje de transacciones\n aceptadas/rechazadas', fontsize=17)
plt.axis('equal')

centre_circle = plt.Circle((0,0),0.40,fc='white')
plt.gca().add_artist(centre_circle)

plt.tight_layout(pad=0, w_pad=0, h_pad=0)
plt.show()

### Ex 4 - Una variable categòrica i una numèrica.

In [None]:
# The following code to create a dataframe and remove duplicated rows is always executed and acts as a preamble for your script: 

# dataset = pandas.DataFrame(id, declined, country, amount)
# dataset = dataset.drop_duplicates()

# Paste or type your script code here:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = dataset

df = df[df['declined'] == 0]

countries = sorted(df['country'].unique())

ax = sns.catplot(data=df, x='country', y='amount', kind='box', color='violet', order= countries)

ax.tick_params(axis='x', rotation=90)

plt.title('Ventas por países de las empresas', fontsize=17)
plt.xlabel(None)
plt.ylabel('Monto (€)')
plt.tight_layout(pad=0, w_pad=0, h_pad=0)

plt.show()

### Ex 5 - Dues variables categòriques.

In [None]:
# The following code to create a dataframe and remove duplicated rows is always executed and acts as a preamble for your script: 

# dataset = pandas.DataFrame(id, id.1, country)
# dataset = dataset.drop_duplicates()

# Paste or type your script code here:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = dataset

countries = sorted(list(df['country']))

top3products = list(df['id.1'].value_counts().head(3).index)

df_top3prod = df[df['id.1'].isin(top3products)]

plt.figure(figsize = (16,8))
ax = sns.countplot(data= df_top3prod, x = 'country', hue = 'product_name', palette = 'magma', order = countries)

plt.title('Distribución por países\n de los tres productos más comprados', fontsize = 17)
plt.xlabel(None)
plt.legend(title='Producto')
ax.tick_params(axis='x', rotation=45)
plt.tight_layout


plt.savefig('output.png')

### Ex 6 - Tres variables.

In [None]:
# The following code to create a dataframe and remove duplicated rows is always executed and acts as a preamble for your script: 

# dataset = pandas.DataFrame(country, amount, declined)
# dataset = dataset.drop_duplicates()

# Paste or type your script code here:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = dataset

df = df.groupby('country').sum('amount')

df['declined'] = pd.cut(df['declined'], [0, 5, 10, 50], labels=['0-5', '6-10', '+10'])

sns.scatterplot(data=df, x='amount', y='country', palette='magma', hue='declined', size = 'declined', sizes=(100,20))

plt.title('Monto de transacciones por país\n y conteo de transacciones rechazadas', fontsize = 12)
plt.xlabel('Monto (€)')
plt.ylabel(None)
plt.legend(bbox_to_anchor=(1.3, 0.4), loc='center right', title= 'Rechazadas')
plt.tight_layout(pad=0, w_pad=0, h_pad=0)

plt.show()

### Ex 7 - Graficar un Pairplot.

In [None]:
# The following code to create a dataframe and remove duplicated rows is always executed and acts as a preamble for your script: 

# dataset = pandas.DataFrame(price, weight, age, amount)
# dataset = dataset.drop_duplicates()

# Paste or type your script code here:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = dataset

df = df[df['declined'] == 0]
df = df.drop(columns=['declined'])

sns.pairplot(df, kind='scatter', hue='country', palette='magma')

plt.title('Pairplot de la BBDD "transactionsnew"', x=-0.8, y=4.18, fontsize = 15)

plt.tight_layout(pad=0, w_pad=0, h_pad=0)

plt.show()