In [47]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import skew, kurtosis
import pandas as pd

In [48]:
cases = [
  {
    'edges': np.array([21, 23, 25, 27, 29, 31]),
    'frequency': np.array([30, 70, 65, 30, 5]),
  },
  {
    'edges': np.array([40, 45, 50, 55, 60, 60]),
    'frequency': np.array([50, 100, 105, 40, 5]),
  },
  {
    'edges': np.array([100, 105, 110, 115, 120, 125]),
    'frequency': np.array([45, 105, 100, 40, 10]),
  },
  {
    'edges': np.array([10, 15, 20, 25, 30, 35]),
    'frequency': np.array([60, 140, 135, 55, 10]),
  },
  {
    'edges': np.array([80, 90, 100, 110, 120, 130]),
    'frequency': np.array([80, 165, 170, 65, 20]),
  },
  {
    'edges': np.array([140, 145, 150, 155, 160, 165]),
    'frequency': np.array([60, 140, 135, 55, 10]),
  },
  {
    'edges': np.array([170, 185, 200, 215, 230, 245]),
    'frequency': np.array([80, 165, 170, 65, 20]),
  },
  {
    'edges': np.array([490, 495, 500, 505, 510, 515]),
    'frequency': np.array([110, 240, 235, 95, 20]),
  },
  {
    'edges': np.array([130, 150, 170, 190, 210, 230]),
    'frequency': np.array([95, 200, 205, 80, 20]),
  },
  {
    'edges': np.array([150, 175, 200, 225, 250, 275]),
    'frequency': np.array([110, 240, 235, 95, 20]),
  },
]

In [49]:
def gen_hist(case_num, edges, frequency):
  bins = (edges[:-1] + edges[1:]) / 2
  width = np.diff(edges)

  plt.bar(bins, frequency, width=width, edgecolor='black', alpha=0.7)
  plt.xlabel('x')
  plt.ylabel('Frequency')

  plt.savefig(f'graphics/{case_num}_hist.png')
  plt.clf()

def gen_cdf(case_num, edges, frequency):
  cdf = np.cumsum(frequency) / np.sum(frequency)

  # добавляем 0 в начало для корректного отображения графика
  cdf = np.insert(cdf, 0, 0)

  plt.plot(edges, cdf, drawstyle='steps-post', color='blue')
  plt.xlabel('x')
  plt.ylabel('F_n(x)')
  plt.grid(True)

  plt.savefig(f'graphics/{case_num}_cdf.png')
  plt.clf()

def calc_chars(case_num, edges, frequency):
  bins = (edges[:-1] + edges[1:]) / 2
  data = np.repeat(bins, frequency)

  weighted_bins = frequency * bins
  sample_mean = np.sum(weighted_bins) / np.sum(frequency)

  sample_variance = np.sum((bins - sample_mean)**2 * frequency) / np.sum(frequency)

  sample_median = np.median(data)

  sample_cv = np.sqrt(sample_variance) / sample_mean

  sample_skewness = skew(data)
  sample_kurtosis = kurtosis(data)

  data_row = {
    'case': [case_num],
    'mean': [sample_mean],
    'variance': [sample_variance],
    'median': [sample_median],
    'cv': [sample_cv],
    'skewness': [sample_skewness],
    'kurtosis': [sample_kurtosis],
  }
  return pd.DataFrame(data_row)

def calc_fluctuated_chars(case_num, edges, frequency):
  bins = (edges[:-1] + edges[1:]) / 2
  data = np.repeat(bins, frequency)
  fluctuation = 1000
  data_fluctuated = np.append(data, np.ones(1000)*fluctuation)

  fluctuated_mean = np.mean(data_fluctuated)
  fluctuated_variance = np.var(data_fluctuated)
  fluctuated_cv = np.std(data_fluctuated) / fluctuated_mean
  fluctuated_median = np.median(data_fluctuated)
  fluctuated_skewness = skew(data_fluctuated)
  fluctuated_kurtosis = kurtosis(data_fluctuated)

  data_row = {
    'case': [case_num],
    'mean': [fluctuated_mean],
    'variance': [fluctuated_variance],
    'median': [fluctuated_median],
    'cv': [fluctuated_cv],
    'skewness': [fluctuated_skewness],
    'kurtosis': [fluctuated_kurtosis],
  }
  return pd.DataFrame(data_row)

chars_df = pd.DataFrame(
  columns=['case', 'mean', 'variance', 'median', 'cv', 'skewness', 'kurtosis'],
)
fluctuated_chars_df = pd.DataFrame(
  columns=['case', 'mean', 'variance', 'median', 'cv', 'skewness', 'kurtosis'],
)

for index, case in enumerate(cases):
  case_num = index + 1
  edges = case['edges']
  frequency = case['frequency']

  gen_hist(case_num, edges, frequency)
  gen_cdf(case_num, edges, frequency)

  chars_df = pd.concat([chars_df, calc_chars(case_num, edges, frequency)])
  fluctuated_chars_df = pd.concat([fluctuated_chars_df, calc_fluctuated_chars(case_num, edges, frequency)])

chars_df.to_csv('chars.csv')
fluctuated_chars_df.to_csv('fluctuated_schars.csv')

  chars_df = pd.concat([chars_df, calc_chars(case_num, edges, frequency)])
  fluctuated_chars_df = pd.concat([fluctuated_chars_df, calc_fluctuated_chars(case_num, edges, frequency)])


<Figure size 640x480 with 0 Axes>