### Measure of Frequency: Relative and Cumulative Frequency, Frequency Tables, Frequency Distribution

**Tutorial: An example to visualize the measure of frequency using pie chart, bar chart , by showing both plots in subplots**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Create a data frame with the new data
data = {"Animal": ["Dog", "Cat", "Cow", "Rabbit"],
        "Frequency": [4, 3, 2, 1]}
df = pd.DataFrame(data)

# Create a figure with three subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 6))

# Plot a pie chart of the frequency of each animal on the first subplot
ax1.pie(df["Frequency"], labels=df["Animal"], autopct="%1.1f%%")
ax1.set_title("Pie chart of favorite animals")

# Plot a bar chart of the frequency of each animal on the second subplot
ax2.bar(df["Animal"], df["Frequency"], color=["brown", "orange", "black", "gray"])
ax2.set_title("Bar chart of favorite animals")
ax2.set_xlabel("Animal")
ax2.set_ylabel("Frequency")

# Save and show the figure
plt.savefig('measure_frequency.jpg',dpi=600,bbox_inches='tight')
plt.show()


**Relative and Cummalative Frequency**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Create a data frame with the given data
data = {"Animal": ["Dog", "Cat", "Cow", "Rabbit"],
        "Frequency": [4, 3, 2, 1]}
df = pd.DataFrame(data)

# Calculate the relative frequency by dividing the frequency by the sum of all frequencies
df["Relative Frequency"] = df["Frequency"] / df["Frequency"].sum()

# Calculate the cumulative frequency by adding the relative frequencies of all the values that are less than or equal to the current value
df["Cumulative Frequency"] = df["Relative Frequency"].cumsum()

# Print the data frame with the relative and cumulative frequency columns
print(df)

# Create a figure with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))

# Plot a pie chart of the relative frequency of each animal on the first subplot
ax1.pie(df["Relative Frequency"], labels=df["Animal"], autopct="%1.1f%%")
ax1.set_title("Pie chart of relative frequency of favorite animals")

# Plot a line chart of the cumulative frequency of each animal on the second subplot
ax2.plot(df["Animal"], df["Cumulative Frequency"], marker="o", color="red")
ax2.set_title("Line chart of cumulative frequency of favorite animals")
ax2.set_xlabel("Animal")
ax2.set_ylabel("Cumulative Frequency")

# Show the figure
plt.savefig('relative_cummalative.jpg',dpi=600,bbox_inches='tight')
plt.show()

### Measure of Central Tendency

In [11]:
import pandas as pd
import statistics as st

# Define a function that takes a data frame as an argument and returns the mean, median, and mode of the salary column
def central_tendency(df):
  # Calculate the mean, median, and mode of the salary column
  mean = df["Salary (NOK)"].mean()
  median = df["Salary (NOK)"].median()
  mod = st.mode(df["Salary (NOK)"])

  # Return the mean, median, and mode as a tuple
  return (mean, median, mod)

# Create a data frame with the new data
data = {"Country": ["USA", "Norway", "Nepal", "India", "China", "Canada", "Sweden"],
        "Salary (NOK)": [57000, 54000, 50000, 50000, 50000, 53000, 53000]}
df = pd.DataFrame(data)

# Call the function and print the results
mean, median, mod = central_tendency(df)
print(f"The mean of the salary is {mean} NOK.")
print(f"The median of the salary is {median} NOK.")
print(f"The mode of the salary is {mod} NOK.")


The mean of the salary is 52428.57142857143 NOK.
The median of the salary is 53000.0 NOK.
The mode of the salary is 50000 NOK.


### Measures of Variablity or Dispersion

**Range**

In [14]:
# Define a data set as a list of numbers
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# Find the maximum and minimum values in the data set
max_value = max(data)
min_value = min(data)

# Calculate the range by subtracting the minimum from the maximum
range = max_value - min_value

# Print the range
print("Range:", range)

Range: 9


**Interquartile range**

In [25]:
import numpy as np

# Your dataset
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# Calculate the first quartile (Q1)
q1 = np.percentile(data, 25)

# Calculate the third quartile (Q3)
q3 = np.percentile(data, 75)

# Calculate the interquartile range (IQR)
iqr = q3 - q1

print(f"Interquartile range:: {iqr}")

IQR: 4.5


**Variance**

In [19]:
import statistics

# Define a data set as a list of numbers
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# Find the mean of the data set
mean = statistics.mean(data)
# Find the sum of squared deviations from the mean
ssd = 0
for x in data:
    ssd += (x - mean) ** 2

# Calculate the variance by dividing the sum of squared deviations by the number of values
variance = ssd / len(data)

# Print the variance
print("Variance:", variance)

Variance: 8.25


**Standard deviation**

In [18]:
import math

# Define a data set as a list of numbers
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# Find the mean of the data set
mean = sum(data) / len(data)

# Find the sum of squared deviations from the mean
ssd = 0
for x in data:
    ssd += (x - mean) ** 2

# Calculate the variance by dividing the sum of squared deviations by the number of values
variance = ssd / len(data)

# Calculate the standard deviation by taking the square root of the variance
std = math.sqrt(variance)

# Print the standard deviation
print("Standard deviation:", std)

Standard deviation: 2.8722813232690143


**Mean deviation**

In [24]:
# Define a data set as a list of numbers
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# Calculate the mean of the data set
mean = sum(data) / len(data)

# Calculate the mean deviation by summing the absolute differences between each data point and the mean
mean_deviation = sum(abs(x - mean) for x in data) / len(data)

# Print the mean deviation
print("Mean Deviation:", mean_deviation)

Mean Deviation: 2.5


**Quartile deviation**

### Measures of Association

**Covariance**

In [4]:
import pandas as pd

# Define the dataframe as a dictionary
df = {"Student": ["A", "B", "C", "D", "E"], "Math Score": [80, 70, 60, 50, 40], "English Score": [90, 80, 70, 60, 50]}

# Convert the dictionary to a pandas dataframe
df = pd.DataFrame(df)

# Calculate the covariance between math and english scores using the cov method
covariance = df["Math Score"].cov(df["English Score"])

# Print the result
print(f"The covariance between math and english score is {covariance}")

The covariance between math and english score is 250.0


**Correlation**

In [6]:
import pandas as pd

# Create a dictionary with the data
data = {"Student": ["A", "B", "C", "D", "E"],
        "Math Score": [80, 70, 60, 50, 40],
        "English Score": [90, 80, 70, 60, 50]}

# Create a dataframe from the dictionary
df = pd.DataFrame(data)

# Compute the correlation between the two columns
correlation = df["Math Score"].corr(df["English Score"])

# Print the result
print("Correlation between math and english score:", correlation)


Correlation between math and english score: 1.0


**Chi-square**

**Cramer's V**

### Distribution: Skewness, Kurtosis,Counting and Grouping Data Values

### Measures of Shape: Plots, Graphs and Charts