In [None]:
import requests
import os
import tarfile
import gzip
from pathlib import Path
import shutil
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
from bs4 import BeautifulSoup
from PIL import Image
from datetime import datetime, timedelta
import rasterio

In [None]:
directory_path = "/home/arman_abouali/Downloads/DWD"

In [None]:
parquet_df =  pd.read_parquet("/home/arman_abouali/Downloads/DWD/Original_files/DWD_window/X_data_window.parquet")
parquet_df = parquet_df.sort_values(by='Key', ascending=True)
parquet_df['Key'] = pd.to_datetime(parquet_df['Key'], format='%Y%m%d_%H%M')
parquet_df['Image_Sum'] = parquet_df['Value'].apply(lambda x: sum(sum(row) for row in x))
parquet_df.reset_index(drop=True, inplace=True)
#parquet_df

In [None]:
# Now you can perform your operations
parquet_idx = pd.date_range("2003-11-01 00:00:00", "2017-12-31 23:45:00", freq="15min")
parquet_df.reset_index(drop=True, inplace=True)
parquet_df = parquet_df.set_index(parquet_idx)
parquet_df = parquet_df.reindex(parquet_idx)
parquet_df = parquet_df.drop('Key', axis=1)

In [None]:
input_df = pd.read_csv('/home/arman_abouali/Downloads/DWD/input.csv', sep=';')

input_df['Zeit'] = input_df['Zeit'].replace("24:00:00", "00:00:00")
input_df['Zeit'] = pd.to_datetime(input_df['Datum'] + ' ' + input_df['Zeit'], format='%d.%m.%Y %H:%M:%S')
input_df = input_df.drop('Datum', axis=1)
input_df = input_df.sort_values(by='Zeit', ascending=True).reset_index(drop=True)
input_df['Sensor_Sum'] = input_df['GranetalsperreMin15Niederschlag'] + input_df['HahnenkleeMin15Niederschlag'] + input_df['Niederschlag_Gosequelle'] + input_df['Niederschlag_Abzuchtquelle']
input_df


In [None]:
idx = pd.date_range("2003-11-01 00:00:00", "2018-06-30 23:45:00", freq="15min")
input_df.reset_index(drop=True, inplace=True)
input_df = input_df.set_index(idx)
input_df = input_df.reindex(idx)

In [None]:
input_df = input_df.drop('Zeit', axis=1)

In [None]:
# Merge the DataFrames on their indices
merged_df = pd.merge(input_df, parquet_df, left_index=True, right_index=True, how='inner')
merged_df

In [None]:
merged_df.to_csv('merged_df.csv', index=True)

In [None]:
max_image_sum_timestamp = merged_df['Image_Sum'].idxmax()

print("Timestamp with maximum 'Image_Sum':", max_image_sum_timestamp)

In [None]:
specific_row = merged_df.loc['2017-06-26 06:15:00']

# Access the 'Value' column of that specific row
specific_value = specific_row['Image_Sum']

print(specific_value)

In [None]:
merged_df['Sub'] = merged_df['Sensor_Sum'] - merged_df['Image_Sum']
mean_sub = merged_df['Sub'].mean()
mean_sub

In [None]:
# Filter rows where 'Image_Sum' is less than 'Sensor_Sum'
filtered_df = merged_df[merged_df['Image_Sum'] < merged_df['Sensor_Sum']]

filtered_df = filtered_df[['Image_Sum', 'Sensor_Sum', 'Sub']]

print(filtered_df.shape)

In [None]:
# Plotting
plt.figure(figsize=(10, 8))

plt.plot(merged_df['Image_Sum'], label='Image_Sum', color='Green')
plt.plot(merged_df['Sensor_Sum'], label='Sensor_Sum', color='red')
plt.legend()
plt.title("Comparison between 'Image_Sum' and 'Sensor_Sum'")
plt.xlabel('Index')
plt.ylabel('Value')
plt.show()

# Counting the number of times 'Sum' is greater or equal to 'Niederschlag_Sum'
count_ge = sum(merged_df['Image_Sum'] < merged_df['Sensor_Sum'])
print(f"The number of times 'Image_Sum' is less than 'Sensor_Sum' is {count_ge}.")

In [None]:
import seaborn as sns
sns.boxplot(x=merged_df['Sub'])

In [None]:
import pandas as pd

# Calculations
mean_val = filtered_df['Sub'].mean()
max_val = filtered_df['Sub'].max()
min_val = filtered_df['Sub'].min()
median_val = filtered_df['Sub'].median()
mode_val = filtered_df['Sub'].mode()

# Display
print(f"Mean: {mean_val}")
print(f"Max: {max_val}")
print(f"Min: {min_val}")
print(f"Median: {median_val}")
print(f"Mode: {mode_val.values}")


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Assume filtered_df['Sub'] contains your data
# bins for the histogram
bins = list(range(0, 46))

# Plotting
plt.figure(figsize=(35, 15))

# Getting histogram data
n, bin_edges, patches = plt.hist(filtered_df['Sub'], bins=bins, edgecolor='k', alpha=0.7)

# Adding frequency annotations
for i in range(len(n)):
    plt.text(bin_edges[i]+0.5, n[i], str(int(n[i])), ha='center', fontsize=16)

plt.title('Histogram of "Sub" Values', fontsize=20)
plt.xlabel('Sub Value Range', fontsize=20)
plt.ylabel('Frequency', fontsize=20)
plt.xticks(bins, fontsize=20)  # setting x-ticks for better readability
plt.yticks(fontsize=20)
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.savefig('your_plot.png')
plt.show()



In [None]:
# Define the path to your .asc file
path = "/home/arman_abouali/Downloads/DWD/YW2017.002_201708_asc/YW2017.002_20170801_asc.tar/YW_2017.002_20170801_0020.asc"

# Open the .asc file with Rasterio
with rasterio.open(path) as src:
    # Read the first band (assuming it's a single-band image)
    image_data = src.read(1)

# Define the coordinates where you want to place markers
# Format: [(x1, y1), (x2, y2), ...]
coordinates = [(467, 444), (467, 452), (475, 444), (475, 452)]

# Separate the coordinates into X and Y for plotting
x_coords = [x for x, y in coordinates]
y_coords = [y for x, y in coordinates]

# Create a new figure with a custom size
plt.figure(figsize=(50, 50))

# Display the image
plt.imshow(np.abs(image_data), vmin=0, vmax=0.01)
plt.colorbar()

# Plot the specific points on the image
plt.scatter(x_coords, y_coords, c='red', marker='o')

plt.title('ASC Image with Specific Points')
plt.show()