LS Distance Ladder



In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#importing the necessary packages

In [None]:
gaia_data = pd.read_csv('C:/Users/arnav/downloads/gaia.csv')
#reads the csv file containing the data given by GAIA containing the time period , apparant magnitude and parallax of the various cepheids of our galaxy

In [None]:
# Get the scatter plot of M and log_10(P) and the best fit line
# Get the values of alpha and beta
# For parallax given in milli arc seconds. The distance is given by 1000/parallax in parsecs
# The absolute magnitude is given by M=m-5log10(d)+5
parallax=gaia_data['parallax']

# Calculates distance
distance=1000/parallax
m=gaia_data['mean apparent magnitude']

# Calculates absolute magnitude
abs_m=m - (5 * np.log10(distance)) + 5
log_p=np.log10(gaia_data['time period'])

# Calculates the avlues of alpha and beta for best fit between log of period and absolute magnitude
v=np.polyfit(log_p, abs_m, deg=1)
p=np.poly1d(v)
plt.plot(log_p, p(log_p), color="red")
plt.scatter(log_p, abs_m)
plt.xlabel('log_10(P (in days))')
plt.ylabel('Absolute Magnitude (M)')
plt.show()
print(v)


Getting the distances to our galaxies

In [None]:
cepheid_data = pd.read_csv('C:/Users/arnav/downloads/cepheid.csv')
#reads the csv file

In [None]:
# Initialize dictionary to store distances and list for all distances
galaxy_distance = {}
distances = []

# Iterate through each Cepheid star's data to calculate distances
for i in range(len(cepheid_data['Apparent Magnitude'])):
    # Calculate the distance of the galaxy using the formula: 
    # m - M = 5 * log10(d) - 5 => d = 10^((m - M + 5) / 5)
    m = cepheid_data['Apparent Magnitude'][i]
    M = p(np.log10(cepheid_data['Period'][i]))
    d = 10 ** ((m - M + 5) / 5)
    
    # Append the calculated distance to the distances list
    distances.append(d)
    
    # Add the distance to the galaxy_distance dictionary
    galaxy_id = cepheid_data['Galaxy_ID'][i]
    if galaxy_id in galaxy_distance:
        galaxy_distance[galaxy_id].append(d)
    else:
        galaxy_distance[galaxy_id] = [d]

# Convert all distances to numpy arrays and ensure they are of float type
for key in galaxy_distance:
    galaxy_distance[key] = np.array(galaxy_distance[key], dtype=float)
distances = np.array(distances, dtype=float)

# Remove outliers: Distances less than 100,000 parsecs and greater than 2.1e7 parsecs
for key in galaxy_distance:
    galaxy_distance[key] = galaxy_distance[key][galaxy_distance[key] > 100000]
distances = distances[(distances > 100000) & (distances < 2.1e7)]

# Remove empty entries from the galaxy_distance dictionary
galaxy_distance = {key: galaxy_distance[key] for key in galaxy_distance if len(galaxy_distance[key]) > 0}

# Calculate the mean distance to each galaxy
galaxy_mean_distance = {key: np.mean(galaxy_distance[key]) for key in galaxy_distance}

# Plot histogram of distances with reasonable outliers
plt.xlabel('Distance of galaxy (Parsec)')
plt.ylabel('Frequency')
plt.hist(distances, bins=50)
plt.title('Histogram of Galaxy Distances')
plt.show()

Note that one of the galaxies (the one with ID) PGC040505 gives a distance of 2383 parsecs with its only data entry. This distance is unfeasable physically so nothing can be said about this galaxy's distance.