In [None]:
import pandas as pd
from math import radians, sin, cos, asin, sqrt

In [None]:
import pandas as pd
def haversine(lat1, lon1, lat2, lon2):
  """
  Calculates the distance between two points on Earth (using the Haversine formula).

  Args:
    lat1: Latitude of the first point in degrees.
    lon1: Longitude of the first point in degrees.
    lat2: Latitude of the second point in degrees.
    lon2: Longitude of the second point in degrees.

  Returns:
    The distance between the two points in kilometers.
  """

  R = 6371  # Earth's radius in kilometers

  lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])

  dlat = lat2 - lat1
  dlon = lon2 - lon1

  a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
  c = 2 * asin(sqrt(a))

  distance = R * c

  return distance


class Star:
    def __init__(self, hr, lat, lon, vMag):
        self.hr = hr
        self.lat = lat
        self.lon = lon
        self.vMag = vMag

class Constellation(Star):
    def __init__(self, star1, star2):
        self.star1 = star1
        self.star2 = star2

    def distance(self):
        return haversine(self.star1.lat, self.star1.lon, self.star2.lat, self.star2.lon)

def read_csv_to_dataframe(file_path):
    """
    Reads a CSV file with hr, lat, lon, and vMag columns and creates a pandas dataframe.

    Args:
        file_path: The path to the CSV file.

    Returns:
        A pandas dataframe containing the data from the CSV file.
    """
    df = pd.read_csv(file_path)
    return df


def calculate_distances(df):
    """
    Calculates the distances between stars in the dataframe and creates a new dataframe.

    Args:
        df: The pandas dataframe containing star data.

    Returns:
        A new pandas dataframe with the two hr numbers of the two stars and the distance between them.
    """
    distances = []
    for i in range(len(df)):
        star1 = Star(df.loc[i, 'HR'], df.loc[i, 'GLAT'], df.loc[i, 'GLON_shifted'], df.loc[i, 'Vmag'])
        for j in range(len(df)):
            star2 = Star(df.loc[j, 'HR'], df.loc[j, 'GLAT'], df.loc[j, 'GLON_shifted'], df.loc[j, 'Vmag'])
            constellation = Constellation(star1, star2)
            distance = constellation.distance()
            distances.append({'Star1 HR': star1.hr, 'Star2 HR': star2.hr, 'Distance': distance})
            print('Distance between HR', star1.hr, 'and HR', star2.hr, 'is', distance, 'km')

    new_df = pd.DataFrame(distances)
    return new_df






In [None]:
# Example usage
# lat1 = -16.88
# lon1 = 65.56
# lat2 = -66.38
# lon2 = -141.61

# distance_in_km = haversine(lat1, lon1, lat2, lon2)
# print(f"The distance between the two points is {distance_in_km:.2f} kilometers.")


df = calculate_distances(read_csv_to_dataframe('star_formated_raw_short.csv'))



In [None]:
# Group the distances by the star's HR number
grouped_df = df.groupby('Star1 HR')

# Get the 5 smallest distances for each group, excluding 0 values
shortest_distances = grouped_df.apply(lambda x: x[x['Distance'] > 0].nsmallest(5, 'Distance'))

# Print the result
display(shortest_distances)

10093
9293

In [None]:
hash_df = []

for name, group in grouped_df:
    distances = group['Distance'].head(5)
    hash_values = (distances.sum() % 10093) 
    hash_df.append({'Star HR': name, 'Hash': hash_values})

display(hash_df)



