In [None]:
import pandas as pd
import numpy as np

In [None]:
# Read input data
df_date_temperature = pd.read_csv('/Salmon_project/data/FW_temperature_filled.csv')
df_date_cycles = pd.read_csv('Salmon_project/data/FW_cycle_dates.csv')
df_strain_season = pd.read_csv('Salmon_project/data/smolt_dataset.csv')

# Check data types
datatypes_1 = df_date_temperature.dtypes
datatypes_2 = df_date_cycles.dtypes

# Drop missing values
df_date_temperature = df_date_temperature.dropna()

# Convert date columns to datetime
df_date_temperature['event_date'] = pd.to_datetime(df_date_temperature['event_date'])
df_date_cycles[['first_movement_date', 'first_feeding_date', 'shipout_date']] = df_date_cycles[
    ['first_movement_date', 'first_feeding_date', 'shipout_date']].apply(pd.to_datetime)

# Add new columns to df_date_cycles
df_date_cycles = df_date_cycles.reindex(columns=df_date_cycles.columns.tolist() + ['days_in_fresh_water', 'average_tempr', 'median_tempr', 'strain', 'season'])

n = 0
while n < len(df_date_cycles):
    # Calculate days in fresh water
    df_date_cycles.at[n, 'days_in_fresh_water'] = int(
        (df_date_cycles.at[n, 'shipout_date'] - df_date_cycles.at[n, 'first_movement_date']) / np.timedelta64(1, 'D'))

    part_number = df_date_cycles.at[n, 'pretransfer_fw_locus_population_id']
    date_start_feeding = df_date_cycles.at[n, 'first_feeding_date']
    date_shipout = df_date_cycles.at[n, 'shipout_date']

    # Filter temperature data based on conditions
    filtered_df = df_date_temperature[
        (df_date_temperature['event_date'] >= date_start_feeding) &
        (df_date_temperature['event_date'] <= date_shipout) &
        (df_date_temperature['final_locus_population_id'] == part_number)
    ]

    # Calculate average and median temperature
    df_date_cycles.at[n, 'average_tempr'] = filtered_df['temperature'].mean()
    df_date_cycles.at[n, 'median_tempr'] = filtered_df['temperature'].median()

    # Filter strain and season data
    filtered_df_2 = df_strain_season[df_strain_season['from_locus_population_id'] == part_number]
#     strain = filtered_df_2['strain']
#     print(filtered_df_2)

    n = n + 1


In [None]:
# Read the CSV file into the dataframe
df_date_temperature = pd.read_csv('Salmon_project/data/FW_temperature_filled_test_2.csv')

# Drop rows with missing values
df_date_temperature = df_date_temperature.dropna()

# Retrieve the list of unique 'final_locus_population_id' values
part_number_list = df_date_temperature['final_locus_population_id'].tolist()

# Initialize lists and a dataframe
unic_part_number_list = []
df_temperature_cleared = pd.DataFrame(columns=['final_locus_population_id', 'event_date', 'temperature_cleared'])

# Iterate through each 'final_locus_population_id'
for item in part_number_list:
    if item not in unic_part_number_list:
        unic_part_number_list.append(item)
        
        # Filter the dataframe based on the current 'final_locus_population_id'
        df_temperature_filtered = df_date_temperature[df_date_temperature['final_locus_population_id'] == item]
        
        # Reset the index of the filtered dataframe
        df_temperature_filtered.reset_index(drop=True, inplace=True)
        
        # Calculate rolling average for 'temperature' column
        df_temperature_filtered['rolling_tempr'] = df_temperature_filtered['temperature'].rolling(30).mean()
        
        # Create a temporary dataframe with the first 30 rows
        df_temporary = df_temperature_filtered.head(30)
        
        # Calculate average temperature and half of it
        average_tempr_30 = df_temporary['temperature'].mean()
        half_average_tempr_30 = average_tempr_30 / 2
        
        # Iterate through each row of the temporary dataframe
        for k in range(len(df_temporary)):
            t_1 = df_temperature_filtered.at[k, 'temperature']
            t_dif = t_1 - average_tempr_30
            
            # Check if the difference is within half of the average temperature
            if abs(t_dif) < half_average_tempr_30:
                df_temperature_filtered.at[k, 'rolling_tempr'] = t_1
            else:
                df_temperature_filtered.at[k, 'rolling_tempr'] = average_tempr_30
        
        # Append the filtered dataframe to the cleared dataframe
        df_temperature_cleared = df_temperature_cleared.append(df_temperature_filtered)
        
        # Print the current iteration
        print('n', len(unic_part_number_list))

# Process temperature clearing
for m in range(len(df_temperature_cleared)):
    t_2 = df_temperature_cleared.iloc[m]['temperature']
    t_rolling_aver = df_temperature_cleared.iloc[m]['rolling_tempr']
    half_t_rolling_aver = t_rolling_aver / 2
    t_dif_clear = t_2 - t_rolling_aver
    
    # Check if the difference is within half of the rolling average
    if abs(t_dif_clear) < half_t_rolling_aver:
        df_temperature_cleared.at[m, 'temperature_cleared'] = t_2
    else:
        df_temperature_cleared.at[m, 'temperature_cleared'] = t_rolling_aver
    
    # Print the current iteration
    print('m', m)

In [None]:
# Read seawater temperature data
df_seawater_temperature = pd.read_csv('Salmon_project/data/seawater_temperature_for_transfers_since_2017_only_first_90days.csv')

# Add new column 'key_param'
df_seawater_temperature = df_seawater_temperature.reindex(columns=df_seawater_temperature.columns.tolist() + ['key_param'])

# Create an empty dataframe to store the final results
df_sw_tempr_final = pd.DataFrame(columns=['locus_id', 'transfer_date', 'to_date', 'event_date', 'average_tempr', 'min_tempr', 'max_tempr'])

n = 0
while n < len(df_seawater_temperature):
    # Generate key_param value
    locus_id = str(df_seawater_temperature.at[n, 'locus_id'])
    transfer_date = str(df_seawater_temperature.at[n, 'transfer_date'])
    key_param = locus_id + '/' + transfer_date
    df_seawater_temperature.at[n, 'key_param'] = key_param

    n = n + 1

# Extract unique keys
keys_list = df_seawater_temperature['key_param'].tolist()
unic_keys_list = []
for item in keys_list:
    if item not in unic_keys_list:
        unic_keys_list.append(item)

m = 0
while m < len(unic_keys_list):
    # Filter seawater temperature data based on unique key
    key = unic_keys_list[m]
    df_seawater_tempr_filtered = df_seawater_temperature[df_seawater_temperature['key_param'] == key]

    # Extract required information from filtered data
    locus_id = df_seawater_tempr_filtered.at[m, 'locus_id']
    # transfer_date
    # to_date
    # event_date
    average_tempr = df_seawater_tempr_filtered['temperature'].mean()
    # min_tempr
    # max_tempr

    m = m + 1
    print(m)
