<a href="https://colab.research.google.com/github/estrickler1/RedShift/blob/main/nz40_Redshift.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Galaxies' Redshift Catalog**

The author is Ethan Strickler

Mentor is Dr. Nicholas Jacob

Idea came from https://www.cfa.harvard.edu/~dfabricant/huchra/zcat/zcom.htm

In [210]:
import pandas as pa
import plotly.express as px
import math
import numpy as np
from scipy import stats

url = 'https://raw.githubusercontent.com/nurfnick/Galaxies/main/nz40.csv'
data = pa.read_csv(url)

url2 = 'https://raw.githubusercontent.com/estrickler1/RedShift/main/CFA2.csv'
data2 = pa.read_csv(url2)

In [211]:
data = data[1:-7] #removing 
data2 = data2[1:-10]

In [212]:
data = data.iloc[:,0:8]
data2 = data2.iloc[:,0:8]

In [213]:
data

Unnamed: 0,Name,RA (1950),Dec m_zw,v_h,sig sour,Type,D1,D1.1
1,00006+2142,36,21420014.4,6605,3212700,5P,1,0.7
2,00006-0211,36,-2110014.3,7323,3112700,4X,1.3,0.8
3,N7814,41.1,15520312,1050,410611,2A s,6.5,2.7
4,00010+2256,100,22560014,7301,3412700,5,1.9,1
5,N7816,112,7110014,5241,510620,4,2,2
...,...,...,...,...,...,...,...,...
2391,N7805,235852.7,31092014.3,4948,2112700,-2X P,1,0.7
2392,N7806,235854,31100014.4,4827,2212700,4X P,1.1,0.8
2393,23591+2313,235906,23130013.2,4371,810620,6 R,2.7,1.3
2394,23592+2314,235912,23140013.9,4336,710620,7B P,1.6,0.5


This is the galaxy N7814 listed in row 3.

![N7814](https://drive.google.com/uc?export=view&id=1UTecn2UdNCUwyvEsOhz8VOHG21_YHr6b)

This is the galaxy N7806 listed in row 2392.

![N7806](https://drive.google.com/uc?export=view&id=1pRUJsWgqbbQBMKtDzP-Xk4IBMDNxKlZV)

Combine the different datasets into one table.

In [214]:
diction ={}
for i in range(8):
  diction[data2.columns[i]] = data.columns[i]
diction
data2 = data2.rename(columns = diction)

In [215]:
frames = [data,data2]

data = pa.concat(frames, ignore_index= True)


In [216]:
data[data['v_h'] == '1 5261']

Unnamed: 0,Name,RA (1950),Dec m_zw,v_h,sig sour,Type,D1,D1.1
64,N 182,3536.0,2270013.5,1 5261,600620,1X T,2.3,1.8
2758,N 182,3538.2,2271513.5,1 5261,600620,1X T,2.3,1.8


In [217]:
data = data[~data.v_h.isnull()]
data.iloc[2300:2400,:]

Unnamed: 0,Name,RA (1950),Dec m_zw,v_h,sig sour,Type,D1,D1.1
2300,N7591,231543.9,6184513.8,4953,1610611,3B,1.9,0.8
2301,23162+2457,231612,24570014,8081,3212700,1B,1,0.8
2302,N7603,231622.7,-14814.4,8826,3412700,3A T,1.4,0.9
2303,I1481,231654,5380014.5,6118,3012707,8P,0.9,0.8
2304,23171+0112,231706,1120014.5,9006,4012710,6,0.9,0.4
...,...,...,...,...,...,...,...,...
2395,I5378S,4,16215415.2,6387,23H2700,,1.5,1.3
2396,00002+1837,12,18363314.8,7882,510650,1B,2,1
2397,00005+0356,23.3,3554815.5,8693,2212700,4A,1.3,0.8
2398,00005+1836,30,18360015.5,7894,2010650,,,


Deleting every row with a flaud v_h value. Then putting the values that have not been deleted back into the table.


In [218]:
badnumbers = data.v_h.loc[data['v_h'].str.match(r'[0-9.]+ ')] #starts with a number and then a space
goodnumbers = badnumbers.str.slice(2,) #get rid of that # and space

In [219]:
 greatnumbers = data.v_h.loc[~data['v_h'].str.match(r'[0-9.]+ ')] #get all the ones that didn't have the above issue

In [220]:
somenumbers = pa.concat([greatnumbers,goodnumbers]) #put them back together

In [221]:
data['v_h'] = somenumbers

In [222]:
data = data.loc[~data['v_h'].str.match(r'[A-Z]')] #removing the ones that had months...

In [223]:
data.v_h.astype(int)

0         6605
1         7323
2         1050
3         7301
4         5241
         ...  
20594    19197
20595     5532
20596     7599
20597     7650
20598     8606
Name: v_h, Length: 20580, dtype: int64

In [224]:
data['v_h'] = pa.to_numeric(data['v_h'])

In [225]:
data.describe()

Unnamed: 0,v_h
count,20580.0
mean,7010.745238
std,14315.606876
min,-571.0
25%,4219.0
50%,6571.5
75%,9032.25
max,926681.0


In [226]:
width = max(data.v_h)-min(data.v_h)

In [227]:
#data = data[(np.abs(stats.zscore(data.v_h)) < 3)]
list = [3000,7000,12000,25000,100000]
percents = []
for item in list:
  percents.append(item/width)

#Colors are red v < 3000, blue 3000< v < 7000, magenta 7000 < v < 12000, cyan 12000 < v < 25000, and green 25,000 < v < 100,000 km/s.
#data.describe()
percents

[0.003235366437602723,
 0.007549188354406353,
 0.012941465750410892,
 0.026961386980022692,
 0.10784554792009077]

In [228]:
data = data.loc[~data['Dec m_zw'].str.match(r'[0-9.]+ ')]
data = data[~(data['Dec m_zw'] == '-001331 9.7')]
decData = data["Dec m_zw"]

Take the given RA (1950) value, which is listed in hours, minutes, and seconds (HHMMSS), then convert it into degrees. One day is equal to 360 degrees.

In [229]:
raData = data["RA (1950)"]

In [230]:
raData = raData.astype(float)
raInDegrees = []

for point in raData:
  x = math.floor(point/10000)
  y = math.floor((point - x*10000)/100)
  z = point - x*10000-y*100
  decimal = x + y/60 + z/3600
  degrees = decimal *360/24
  raInDegrees.append(degrees)

raInDegrees

data["RADegrees"] = raInDegrees 


In [231]:
point = float(raData[17])  
x = math.floor(point/10000)
y = math.floor((point - x*10000)/100)
z = point - x*10000-y*100

print(point, x , y, z)

754.0 0 7 54.0


In [232]:
data["Dec m_zw"]

0        21420014.4
1        -2110014.3
2          15520312
3          22560014
4           7110014
            ...    
20594     2400015.1
20595    12413714.3
20596    26554315.5
20597     3042614.9
20598     3480015.4
Name: Dec m_zw, Length: 20546, dtype: object

Take the given Dec m_zw data, which is given in degrees, minutes, seconds, and milliseconds (DDDMMSSmm), then convert it to degrees.

In [233]:
decData = decData.astype(float)

decInDegrees = []

for point in decData:
  if point>0:
    w = math.floor(point/1000000)
    x = math.floor((point - w*1000000)/10000)
    y = math.floor((point - w*1000000-x*10000)/100)
    z = point - w*1000000 - x*10000 - y*100
    decimal = x/60 + y/3600 + z/(3.6*10**6)
    degrees = decimal *360/24 + w
    decInDegrees.append(degrees)
  else:
    point = abs(point)
    w = math.floor(point/1000000)
    x = math.floor((point - w*1000000)/10000)
    y = math.floor((point - w*1000000-x*10000)/100)
    z = point - w*1000000 - x*10000 - y*100
    decimal = x/60 + y/3600 + z/(3.6*10**6)
    degrees = decimal *360/24 + w
    decInDegrees.append(-degrees)

data["DecDegrees"] = decInDegrees

In [234]:
new =[]
newRA =[]
for i in range(len(decInDegrees)):
  if decInDegrees[i] < 0:
    new.append(abs(decInDegrees[i]))
    if raInDegrees[i] <= 180:
      newRA.append(raInDegrees[i] + 180)
    else:
      newRA.append(raInDegrees[i] - 180)
  else:
    new.append(decInDegrees[i])
    newRA.append(raInDegrees[i])

data["NewDecDegrees"] = new
data["NewRA"] = newRA

In [235]:
data.describe()

Unnamed: 0,v_h,RADegrees,DecDegrees,NewDecDegrees,NewRA
count,20546.0,20546.0,20546.0,20546.0,20546.0
mean,7021.585953,174.642789,33.561132,33.912902,176.281064
std,14324.956335,87.023856,21.01151,20.438861,84.106015
min,-571.0,0.016667,-15.929221,6.2e-05,0.016667
25%,4229.25,127.865521,17.250057,17.250057,132.768229
50%,6576.0,180.211042,30.408391,30.408391,181.415417
75%,9040.75,225.941042,47.383398,47.383398,225.139583
max,926681.0,359.975,98.979229,98.979229,359.975


In [236]:
fig = px.scatter_polar(data, r="DecDegrees", theta="RADegrees", color = "v_h",color_continuous_scale="blackbody")
fig

In [237]:
fig = px.scatter_polar(data, r="NewDecDegrees", theta="NewRA", color = "v_h",color_continuous_scale="blackbody",range_color=[0,20000])
fig

#Colors are red v < 3000, blue 3000< v < 7000, magenta 7000 < v < 12000, cyan 12000 < v < 25000, and green 25,000 < v < 100,000 km/s.
#data.describe()
percents

In [246]:
fig = px.scatter_polar(data, r="NewDecDegrees", theta="NewRA", color = "v_h", color_continuous_scale=[(0.00, "red"),   (.03, "red"),
                                                     (.03, "blue"), (.07, "blue"),
                                                     (.07, "magenta"), (.12, "magenta"),
                                                     (.12, "cyan"),(.25,"cyan"),
                                                     (.25, "green"),  (1, "green")],range_color=[0,100000]  )
fig
        

In [240]:
percents

[0.003235366437602723,
 0.007549188354406353,
 0.012941465750410892,
 0.026961386980022692,
 0.10784554792009077]

In [245]:
fig = px.scatter_polar(data, r="NewDecDegrees", theta="NewRA", color = "v_h", color_continuous_scale=[(0.00, "red"),   (percents[0], "red"),
                                                     (percents[0], "blue"), (percents[1], "blue"),
                                                     (percents[1], "magenta"), (percents[2], "magenta"),
                                                     (percents[2], "cyan"),(percents[3],"cyan"),
                                                     (percents[3], "green"),  (percents[4], "green"),
                                                     (percents[4],"black"),(1,"black")]  )
fig
                  

![ZCAT 2000](https://drive.google.com/uc?export=view&id=1xnuRUcgVl7UxQ1y6N3GP8EvV9U5IQ7xF)