# 第2章 確率・統計の基礎

In [None]:
import os
DATA_DIR = "LNPR_BOOK_CODES/sensor_data/"
os.chdir(DATA_DIR)

## 2.1 センサデータの収集とJupyter Notebook上での準備

In [None]:
import pandas as pd
data  = pd.read_csv("sensor_data_200.txt", delimiter=" ", 
                    header=None, names = ("date","time","ir","lidar"))
data

In [None]:
print(data["lidar"][0:5])

## 2.2 度数分布と確率分布

### 2.2.1 ヒストグラムの描画

In [None]:
import matplotlib.pyplot as plt
data["lidar"].hist(bins=max(data["lidar"])-min(data["lidar"]), align="left")
plt.show()

### 2.2.3 雑音の数値化

In [None]:
mean1 = sum(data["lidar"].values)/len(data["lidar"].values)
mean2 = data["lidar"].mean()
print("素朴な方法")
print(mean1)
print("pandasのメソッド")
print(mean2)

In [None]:
data["lidar"].hist(bins = max(data["lidar"]) - min(data["lidar"]),color="orange",align='left')   ###avgplot###
plt.vlines(mean1,ymin=0,ymax=5000,color="red")
plt.show()

In [None]:
# 定義から計算　                     ### calcvar
zs = data["lidar"].values  
mean = sum(zs)/len(zs)
diff_square = [ (z - mean)**2 for z in zs]

sampling_var = sum(diff_square)/(len(zs))     # 標本分散
unbiased_var = sum(diff_square)/(len(zs)-1) # 不偏分散

print("素朴な方法")
print("標本分散", sampling_var)
print("不偏分散", unbiased_var)

# Pandasを使用
pandas_sampling_var = data["lidar"].var(ddof=False) # 標本分散
pandas_default_var = data["lidar"].var()        # デフォルト（不偏分散）

print("Pandasのメソッド")
print("標本分散",pandas_sampling_var)
print("不偏分散", pandas_default_var)

# NumPyを使用
import numpy as np

numpy_default_var = np.var(data["lidar"])  # デフォルト（標本分散）
numpy_unbiased_var = np.var(data["lidar"], ddof=1)  # 不偏分散

print("Numpyのメソッド")
print("標本分散", numpy_default_var)
print("不偏分散", numpy_unbiased_var)

In [None]:
import math ###  calcstddev

# 定義から計算
stddev1 = math.sqrt(sampling_var)
stddev2 = math.sqrt(unbiased_var)

# Pandasを使用 
pandas_stddev = data["lidar"].std()

print("素朴な方法")
print("標本分散を利用", stddev1)
print("不偏分散を利用", stddev2)
print("Pandasのメソッド", pandas_stddev)

### 2.2.4 (素朴な)確率分布

In [None]:
freqs = pd.DataFrame(data["lidar"].value_counts())  ###freqs###
freqs.transpose() #横向きに出力

In [None]:
freqs["probs"] = freqs["lidar"]/len(data["lidar"]) ###addprobs###
freqs.transpose()

In [None]:
sum(freqs["probs"])  ###confirmsum###

In [None]:
freqs["probs"].sort_index().plot.bar(color="blue")   ###probdist###
plt.show()

### 2.2.5 確率分布を用いたシミュレーション

In [None]:
def drawing(): #ややこしいので関数として定義  ###one_sampling###
    return freqs.sample(n=1, weights="probs").index[0]

drawing() # 実行

In [None]:
# samples = [ drawing() for i in range(len(data))] ### sampling_simulation ###
samples = [ drawing() for i in range(1000)] #コーディング中は1行目の代わりにこちらを使う
simulated = pd.DataFrame(samples, columns=["lidar"])
p = simulated["lidar"]
p.hist(bins = max(p) - min(p),color="orange",align='left')  
plt.show()