In [1]:
import matplotlib.pyplot as plt
import numpy as np
from scipy import interpolate
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense
import pandas as pd
# 加载plt文件（2D数据）

In [2]:
def plt_to_dataframe(plt_path):
    # 读取 .plt 文件
    df = pd.read_csv(plt_path)
    return df

plt_path = './data/out.000001.plt'
df = pd.read_csv(plt_path, skiprows=[0, 2], low_memory=False)

In [3]:
df

Unnamed: 0,"VARIABLES = ""X""","""Y""","""H""","""uc""","""vc"""
0,112.52654761422502 34.89041200610437 ...,,,,
1,112.5237073026904 34.88683509653841 0...,,,,
2,113.42018405060665 34.990639143309394 ...,,,,
3,113.41733836472532 34.9829274662707 0...,,,,
4,113.46590499249639 34.961568791623286 ...,,,,
...,...,...,...,...,...
871594,426588 426587 426256 426257,,,,
871595,426586 426585 426193 426194,,,,
871596,426580 426579 426578 426581,,,,
871597,426577 426576 426267 426268,,,,


In [4]:
df.columns

Index([' VARIABLES = "X"', ' "Y"', '  "H"', '  "uc"', ' "vc" '], dtype='object')

In [5]:
df.columns = df.columns.str.replace('"', '')

In [6]:
df.columns

Index([' VARIABLES = X', ' Y', '  H', '  uc', ' vc '], dtype='object')

In [7]:
df.columns = df.columns.str.replace(' ', '')

In [8]:
df.columns

Index(['VARIABLES=X', 'Y', 'H', 'uc', 'vc'], dtype='object')

In [9]:
df.rename(columns={'VARIABLES=X': 'X'}, inplace=True)

In [10]:
df.columns

Index(['X', 'Y', 'H', 'uc', 'vc'], dtype='object')

In [11]:
df

Unnamed: 0,X,Y,H,uc,vc
0,112.52654761422502 34.89041200610437 ...,,,,
1,112.5237073026904 34.88683509653841 0...,,,,
2,113.42018405060665 34.990639143309394 ...,,,,
3,113.41733836472532 34.9829274662707 0...,,,,
4,113.46590499249639 34.961568791623286 ...,,,,
...,...,...,...,...,...
871594,426588 426587 426256 426257,,,,
871595,426586 426585 426193 426194,,,,
871596,426580 426579 426578 426581,,,,
871597,426577 426576 426267 426268,,,,


In [12]:
# 去除 'X' 列前面的空格
df['X'] = df['X'].str.strip()

In [15]:
df.head()

Unnamed: 0,X,Y,H,uc,vc
0,112.52654761422502 34.89041200610437 0....,,,,
1,112.5237073026904 34.88683509653841 0.0...,,,,
2,113.42018405060665 34.990639143309394 0...,,,,
3,113.41733836472532 34.9829274662707 0.0...,,,,
4,113.46590499249639 34.961568791623286 0...,,,,


In [16]:
# 将 X 列中的每一行分割成单独的数值
# 使用正则表达式 \s+ 来匹配一个或多个空白字符进行分割
split_df = df['X'].str.split('\s+', expand=True)

In [17]:
split_df

Unnamed: 0,0,1,2,3,4
0,112.52654761422502,34.89041200610437,0.0000,0.0000,0.0000
1,112.5237073026904,34.88683509653841,0.0000,0.0000,0.0000
2,113.42018405060665,34.990639143309394,0.0000,0.0000,0.0000
3,113.41733836472532,34.9829274662707,0.0000,0.0000,0.0000
4,113.46590499249639,34.961568791623286,0.0000,0.0000,0.0000
...,...,...,...,...,...
871594,426588,426587,426256,426257,
871595,426586,426585,426193,426194,
871596,426580,426579,426578,426581,
871597,426577,426576,426267,426268,


In [19]:
# 前 430293 行是点位数据,因此只处理前 430293 行
split_df = split_df.iloc[:430293, :]

In [20]:
split_df

Unnamed: 0,0,1,2,3,4
0,112.52654761422502,34.89041200610437,0.0000,0.0000,0.0000
1,112.5237073026904,34.88683509653841,0.0000,0.0000,0.0000
2,113.42018405060665,34.990639143309394,0.0000,0.0000,0.0000
3,113.41733836472532,34.9829274662707,0.0000,0.0000,0.0000
4,113.46590499249639,34.961568791623286,0.0000,0.0000,0.0000
...,...,...,...,...,...
430288,118.14183603214005,37.380475527323256,0.0000,0.0000,0.0000
430289,118.14297793236382,37.38065612079234,0.0000,0.0000,0.0000
430290,118.14486731116918,37.380781879782745,0.0000,0.0000,0.0000
430291,118.14294961755947,37.37978589478041,0.0000,0.0000,0.0000


In [21]:
# 将分割后的数据转换为数值类型
split_df = split_df.apply(pd.to_numeric, errors='coerce')
# split_df.dtypes

In [22]:
# 分配分割后的数据到原始 df 的相应列中
df.loc[:430292, 'X'] = split_df[0]
df.loc[:430292, 'Y'] = split_df[1]
df.loc[:430292, 'H'] = split_df[2]
df.loc[:430292, 'uc'] = split_df[3]
df.loc[:430292, 'vc'] = split_df[4]

In [24]:
df

Unnamed: 0,X,Y,H,uc,vc
0,112.526548,34.890412,0.0,0.0,0.0
1,112.523707,34.886835,0.0,0.0,0.0
2,113.420184,34.990639,0.0,0.0,0.0
3,113.417338,34.982927,0.0,0.0,0.0
4,113.465905,34.961569,0.0,0.0,0.0
...,...,...,...,...,...
871594,426588 426587 426256 426257,,,,
871595,426586 426585 426193 426194,,,,
871596,426580 426579 426578 426581,,,,
871597,426577 426576 426267 426268,,,,


In [27]:
# 生成一个从 1 到 430293 的序列
id_values = range(1, 430294)

# 在 'X' 列前面添加 'ID' 列，并为前 430293 行赋值
df.insert(loc=0, column='ID', value=pd.Series(id_values))

# 将 'ID' 列中的 NaN 值赋值为 0
df['ID'] = df['ID'].fillna(0).astype(int)

df

Unnamed: 0,ID,X,Y,H,uc,vc
0,1,112.526548,34.890412,0.0,0.0,0.0
1,2,112.523707,34.886835,0.0,0.0,0.0
2,3,113.420184,34.990639,0.0,0.0,0.0
3,4,113.417338,34.982927,0.0,0.0,0.0
4,5,113.465905,34.961569,0.0,0.0,0.0
...,...,...,...,...,...,...
871594,0,426588 426587 426256 426257,,,,
871595,0,426586 426585 426193 426194,,,,
871596,0,426580 426579 426578 426581,,,,
871597,0,426577 426576 426267 426268,,,,


In [28]:
df['ID'].dtype

dtype('int32')

In [29]:
# 创建储存点位的dataframe
points_df = df.loc[:430292, :]
points_df

Unnamed: 0,ID,X,Y,H,uc,vc
0,1,112.526548,34.890412,0.0,0.0,0.0
1,2,112.523707,34.886835,0.0,0.0,0.0
2,3,113.420184,34.990639,0.0,0.0,0.0
3,4,113.417338,34.982927,0.0,0.0,0.0
4,5,113.465905,34.961569,0.0,0.0,0.0
...,...,...,...,...,...,...
430288,430289,118.141836,37.380476,0.0,0.0,0.0
430289,430290,118.142978,37.380656,0.0,0.0,0.0
430290,430291,118.144867,37.380782,0.0,0.0,0.0
430291,430292,118.14295,37.379786,0.0,0.0,0.0


In [47]:
# 统计点位数据中H值大于0的点位数量
count_nonzero_h =(points_df['H'] == 0).sum()
count_nonzero_h

235900

In [48]:
count_nonzero_h_uc_vc = ((points_df['H'] == 0) & ((points_df['uc'] != 0) | (points_df['vc'] != 0))).sum()
count_nonzero_h_uc_vc

0

In [43]:
# 统计点位数据中uc值大于0的点位数量
count_nonzero_uc =(points_df['uc'] > 0).sum()
count_nonzero_uc

169928

In [44]:
# 统计点位数据中vc值大于0的点位数量
count_nonzero_vc =(points_df['vc'] > 0).sum()
count_nonzero_vc

151304

In [30]:
# # 输出 'ID' 列值为 1, 3829, 29426, 3857 的行
# result_df = df.loc[df['ID'].isin([1, 3829, 29426, 3857]), :]

# # 现在 result_df 包含了 ID 为 1, 3829, 29426, 3857 的行
# print(result_df)

In [31]:
# # 输出索引为430294的列的值
# value = pd.DataFrame(df.loc[430294, :])
# value

In [32]:
# # 输出 'ID' 列值为 1, 3829, 29426, 3857 的行
# result_df = df.loc[df['ID'].isin([3829, 3830, 29427, 29426]), :]

# # 现在 result_df 包含了 ID 为 1, 3829, 29426, 3857 的行
# print(result_df)

In [35]:
# 从第 430294 行开始是点位拓扑关系数据
topology_df_copy = df.loc[430293:, :].copy()  
topology_df_copy

Unnamed: 0,ID,X,Y,H,uc,vc
430293,0,1 3829 29426 3857,,,,
430294,0,3829 3830 29427 29426,,,,
430295,0,3830 3831 29428 29427,,,,
430296,0,3831 3832 29429 29428,,,,
430297,0,3832 3833 29430 29429,,,,
...,...,...,...,...,...,...
871594,0,426588 426587 426256 426257,,,,
871595,0,426586 426585 426193 426194,,,,
871596,0,426580 426579 426578 426581,,,,
871597,0,426577 426576 426267 426268,,,,


In [36]:
# 将 'X' 列中的字符串拆分成四个独立的列
topology_df = topology_df_copy['X'].str.split('\s+', expand=True)
topology_df = topology_df.apply(pd.to_numeric)
topology_df.rename(columns={0: 'ID1', 1: 'ID2', 2: 'ID3', 3: 'ID4'}, inplace=True)
topology_df

Unnamed: 0,ID1,ID2,ID3,ID4
430293,1,3829,29426,3857
430294,3829,3830,29427,29426
430295,3830,3831,29428,29427
430296,3831,3832,29429,29428
430297,3832,3833,29430,29429
...,...,...,...,...
871594,426588,426587,426256,426257
871595,426586,426585,426193,426194
871596,426580,426579,426578,426581
871597,426577,426576,426267,426268


In [None]:
# topology_df.dtypes

In [None]:
# # 输出索引为871594的df['X']列的值
# df.at[871594, 'X']
# # print(f"The value of 'X' at index 871594 is: {x_value}")

In [None]:
# column_type = df['X'].dtype
# print(f"The data type of the 'X' column is: {column_type}")

In [None]:
# unique_types = df['X'].apply(type).unique()
# print(f"Unique data types in the 'X' column: {unique_types}")

In [None]:
# # 寻找包含特定字符串的行
# target_string = '          1       3829      29426       3857'
# condition = df['X'].astype(str).str.contains(target_string, regex=False)

# # 找到满足条件的行的索引
# target_index = df.index[condition]

# # 获取满足条件的最小索引（即第一个满足条件的行），然后计算行数
# if not target_index.empty:
#     first_target_index = target_index.min()
#     # 如果我们想要到达目标字符串的前一行，我们需要减去1
#     number_of_rows_until_target = first_target_index
#     print(f"Number of rows from the first row to the row before the target string: {number_of_rows_until_target}")
# else:
#     print("The target string was not found in the 'X' column.")

In [None]:
# len(df)

In [None]:
# # 假设坐标点数据和连接信息的区分是：坐标点全部是浮点数，连接信息全部是整数
# # 使用 try-except 结构，尝试将第一列转换为整数，如果失败则这行属于坐标点数据
# def is_float(value):
#     try:
#         float(value)
#         return True
#     except ValueError:
#         return False

# # 应用函数来检查每行是否为浮点数
# is_float_mask = df.applymap(is_float).all(axis=1)

# # 统计上半部分（坐标点数据）的行数
# number_of_coordinates = is_float_mask.sum()
# print(f"Number of coordinate points: {number_of_coordinates}")

In [None]:
# # 定义一个函数来检查值是否为整数
# def is_integer(value):
#     try:
#         int(value)
#         return True
#     except ValueError:
#         return False

# # 应用函数来检查每行是否为整数
# is_integer_mask = df.applymap(is_integer).all(axis=1)

# # 统计为整数的行数
# number_of_integer_rows = is_integer_mask.sum()
# print(f"Number of rows with only integer values: {number_of_integer_rows}")

In [None]:
# df = df.dropna(how='all')