# Question1 
Re-analyse the annual maximum gust wind speeds for (I) the years 1952 to 1998, (II) the years 1952 to 1997, i.e. ignore the high value recorded in 1998. Compare the resulting predictions of design wind speeds for (a) 50 years return period, and (b) 1000 years return period, and comment.
# Question2 
Using the parameter estimation approach in code GB 50009 2012, predict the 10, 20, 50, 100, 200, 500, 1000 years return period design wind speeds for the above two cases, and compare the results with the Gumbel approach.


In [3]:

# 导入必要的库，pandas用于数据处理
import pandas as pd
from io import StringIO # python3中StringIO的位置

# 加载数据
data = """
year,Wind speed m/s
1952,31.4
1953,33.4
1954,29.8
1955,30.3
1956,27.8
1957,30.3
1958,29.3
1959,36.5
1960,29.3
1961,27.3
1962,31.9
1963,28.8
1964,25.2
1965,27.3
1966,23.7
1967,27.8
1968,32.4
1969,27.8
1970,26.2
1971,30.9
1972,31.9
1973,27.3
1974,25.7
1975,32.9
1976,28.3
1977,27.3
1978,28.3
1979,28.3
1980,29.3
1981,27.8
1982,27.8
1983,30.9
1984,26.7
1985,30.3
1986,28.3
1987,30.3
1988,34
1989,28.8
1990,30.3
1991,27.3
1992,27.8
1993,28.8
1994,30.9
1995,26.2
1996,25.7
1997,24.7
1998,42.2
"""

# 将data数据转换为StringIO对象，再读取为DataFrame
df = pd.read_csv(StringIO(data)) 


# Answer

## Question1

In [1]:
import pandas as pd
import numpy as np
from io import StringIO

# 加载数据
data = """
year,Wind speed m/s
1952,31.4
1953,33.4
1954,29.8
1955,30.3
1956,27.8
1957,30.3
1958,29.3
1959,36.5
1960,29.3
1961,27.3
1962,31.9
1963,28.8
1964,25.2
1965,27.3
1966,23.7
1967,27.8
1968,32.4
1969,27.8
1970,26.2
1971,30.9
1972,31.9
1973,27.3
1974,25.7
1975,32.9
1976,28.3
1977,27.3
1978,28.3
1979,28.3
1980,29.3
1981,27.8
1982,27.8
1983,30.9
1984,26.7
1985,30.3
1986,28.3
1987,30.3
1988,34
1989,28.8
1990,30.3
1991,27.3
1992,27.8
1993,28.8
1994,30.9
1995,26.2
1996,25.7
1997,24.7
1998,42.2
"""

df = pd.read_csv(StringIO(data))

# 划分数据集
df_I = df.copy()  # 包含1998年
df_II = df[df['year'] < 1998]  # 排除1998年

# 欧拉-马歇罗尼常数
gamma = 0.5772

def estimate_gumbel_params(speed_data):
    """使用矩估计法计算Gumbel分布参数"""
    mean = np.mean(speed_data)
    std = np.std(speed_data, ddof=1)  # 无偏标准差
    beta = (std * np.sqrt(6)) / np.pi
    mu = mean - gamma * beta
    return mu, beta

# 计算两种情况的参数
mu_I, beta_I = estimate_gumbel_params(df_I['Wind speed m/s'])
mu_II, beta_II = estimate_gumbel_params(df_II['Wind speed m/s'])

def design_wind_speed(T, mu, beta):
    """计算指定重现期的设计风速"""
    if T <= 1:
        return np.inf
    F = 1 - 1/T
    return mu - beta * np.log(-np.log(F))

# 计算四个设计风速
T50_I = design_wind_speed(50, mu_I, beta_I)
T1000_I = design_wind_speed(1000, mu_I, beta_I)
T50_II = design_wind_speed(50, mu_II, beta_II)
T1000_II = design_wind_speed(1000, mu_II, beta_II)

# 输出结果
print("情况I（包含1998年）参数：μ = {:.2f}, β = {:.2f}".format(mu_I, beta_I))
print("情况II（排除1998年）参数：μ = {:.2f}, β = {:.2f}\n".format(mu_II, beta_II))

print("50年重现期设计风速对比：")
print("情况I: {:.2f} m/s".format(T50_I))
print("情况II: {:.2f} m/s\n".format(T50_II))

print("1000年重现期设计风速对比：")
print("情况I: {:.2f} m/s".format(T1000_I))
print("情况II: {:.2f} m/s".format(T1000_II))

情况I（包含1998年）参数：μ = 27.83, β = 2.49
情况II（排除1998年）参数：μ = 27.82, β = 2.01

50年重现期设计风速对比：
情况I: 37.55 m/s
情况II: 35.67 m/s

1000年重现期设计风速对比：
情况I: 45.04 m/s
情况II: 41.71 m/s


## Coments:

The high value recorded in 1998 will result in significantly higher design wind speeds. More comprehensive samples will help to better determine the design wind speed.

## Question2

In [6]:
import pandas as pd
import numpy as np
from io import StringIO

# 加载数据
data = """
year,Wind speed m/s
1952,31.4
1953,33.4
1954,29.8
1955,30.3
1956,27.8
1957,30.3
1958,29.3
1959,36.5
1960,29.3
1961,27.3
1962,31.9
1963,28.8
1964,25.2
1965,27.3
1966,23.7
1967,27.8
1968,32.4
1969,27.8
1970,26.2
1971,30.9
1972,31.9
1973,27.3
1974,25.7
1975,32.9
1976,28.3
1977,27.3
1978,28.3
1979,28.3
1980,29.3
1981,27.8
1982,27.8
1983,30.9
1984,26.7
1985,30.3
1986,28.3
1987,30.3
1988,34
1989,28.8
1990,30.3
1991,27.3
1992,27.8
1993,28.8
1994,30.9
1995,26.2
1996,25.7
1997,24.7
1998,42.2
"""
df = pd.read_csv(StringIO(data))

# ================== Gumbel方法计算 ==================
def estimate_gumbel_params(speed_data):
    """Gumbel分布参数估计（矩估计法）"""
    mean = np.mean(speed_data)
    std = np.std(speed_data, ddof=1)  # 无偏标准差
    beta = (std * np.sqrt(6)) / np.pi
    mu = mean - 0.5772 * beta
    return mu, beta

# ================== GB规范方法计算 ==================
# 定义规范表格E.3.2（修正n=100的C1值）
table_E32 = [
    [10, 0.9497, 0.4952], [15, 1.02057, 0.5182],
    [20, 1.06283, 0.52355], [25, 1.09145, 0.53086],
    [30, 1.11238, 0.53622], [35, 1.12847, 0.54034],
    [40, 1.14132, 0.54362], [45, 1.15185, 0.54630],
    [50, 1.16066, 0.54853], [60, 1.17465, 0.55208],
    [70, 1.18536, 0.55477], [80, 1.19385, 0.55688],
    [90, 1.20649, 0.5586], [100, 1.20649, 0.56002],
    [250, 1.24292, 0.56878], [500, 1.2588, 0.57240],
    [1000, 1.26851, 0.57450], [np.inf, 1.28255, 0.57722]
]

def interpolate_C1C2(n):
    """精确线性插值函数（处理所有边界条件）"""
    # 处理极端值
    if n <= 10:
        return table_E32[0][1], table_E32[0][2]
    if n >= 1000:
        return table_E32[-2][1], table_E32[-2][2]
    
    # 遍历查找插值区间
    for i in range(len(table_E32)-1):
        lower = table_E32[i][0]
        upper = table_E32[i+1][0]
        
        if lower <= n < upper:
            # 提取相邻两行的系数
            C1_low, C2_low = table_E32[i][1], table_E32[i][2]
            C1_high, C2_high = table_E32[i+1][1], table_E32[i+1][2]
            
            # 计算插值比例
            frac = (n - lower) / (upper - lower)
            C1 = C1_low + frac * (C1_high - C1_low)
            C2 = C2_low + frac * (C2_high - C2_low)
            return round(C1, 5), round(C2, 5)
    
    return table_E32[-1][1], table_E32[-1][2]  # 理论上不会执行到

def gb_parameters(speed_data):
    """按规范计算参数α和u"""
    n = len(speed_data)
    x_bar = np.mean(speed_data)
    sigma1 = np.std(speed_data, ddof=1)  # 无偏标准差
    
    # 插值获取系数
    C1, C2 = interpolate_C1C2(n)
    alpha = C1 / sigma1
    u = x_bar - C2 / alpha
    return alpha, u

# ================== 计算与对比 ==================
# 划分数据集
df_I = df.copy()          # 包含1998年数据，n=47
df_II = df[df['year'] < 1998]  # 排除1998年，n=46

# Gumbel参数
mu_I, beta_I = estimate_gumbel_params(df_I['Wind speed m/s'])
mu_II, beta_II = estimate_gumbel_params(df_II['Wind speed m/s'])

# GB规范参数
alpha_gb_I, u_gb_I = gb_parameters(df_I['Wind speed m/s'])
alpha_gb_II, u_gb_II = gb_parameters(df_II['Wind speed m/s'])

# 设计风速计算函数
def design_wind_speed(T, mu, beta):
    """Gumbel方法设计风速"""
    if T <= 1:
        return np.inf
    return mu - beta * np.log(-np.log(1 - 1/T))

def design_speed_gb(T, alpha, u):
    """规范方法设计风速"""
    if T <= 1:
        return np.inf
    return u - (1/alpha) * np.log(-np.log(1 - 1/T))

# 定义需要计算的重现期列表
T_values = [10, 20, 50, 100, 200, 500, 1000]

# 生成对比结果
result_I = pd.DataFrame([
    [T, design_wind_speed(T, mu_I, beta_I), design_speed_gb(T, alpha_gb_I, u_gb_I)]
    for T in T_values
], columns=["Return Period", "Gumbel", "GB Method"])

result_II = pd.DataFrame([
    [T, design_wind_speed(T, mu_II, beta_II), design_speed_gb(T, alpha_gb_II, u_gb_II)]
    for T in T_values
], columns=["Return Period", "Gumbel", "GB Method"])

# 输出结果
print("="*45)
print("情况I（包含1998年，n=47）参数对比：")
print(f"Gumbel参数: μ={mu_I:.2f}, β={beta_I:.2f}")
print(f"GB方法参数: α={alpha_gb_I:.4f}, u={u_gb_I:.2f}\n")
print(result_I.round(2))

print("\n" + "="*45)
print("情况II（排除1998年，n=46）参数对比：")
print(f"Gumbel参数: μ={mu_II:.2f}, β={beta_II:.2f}")
print(f"GB方法参数: α={alpha_gb_II:.4f}, u={u_gb_II:.2f}\n")
print(result_II.round(2))

情况I（包含1998年，n=47）参数对比：
Gumbel参数: μ=27.83, β=2.49
GB方法参数: α=0.3615, u=27.75

   Return Period  Gumbel  GB Method
0             10   33.44      33.98
1             20   35.23      35.97
2             50   37.55      38.55
3            100   39.29      40.48
4            200   41.03      42.40
5            500   43.31      44.94
6           1000   45.04      46.86

情况II（排除1998年，n=46）参数对比：
Gumbel参数: μ=27.82, β=2.01
GB方法参数: α=0.4475, u=27.76

   Return Period  Gumbel  GB Method
0             10   32.35      32.79
1             20   33.79      34.40
2             50   35.67      36.48
3            100   37.07      38.04
4            200   38.47      39.60
5            500   40.31      41.65
6           1000   41.71      43.20
