<a href="https://colab.research.google.com/github/estrickler1/RedShift/blob/main/nz40_Redshift.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Galaxies' Redshift Catalog**

The author is Ethan Strickler

Mentor is Dr. Nicholas Jacob

Idea came from https://www.cfa.harvard.edu/~dfabricant/huchra/zcat/zcom.htm

![RedShiftExample](https://drive.google.com/uc?export=view&id=12KXKzdGP4ieRVxB6JJigNrFOt50neUzI)

In [64]:
import pandas as pa
import plotly.express as px
import math
import numpy as np
from scipy import stats

url = 'https://raw.githubusercontent.com/nurfnick/Galaxies/main/nz40.csv'
data = pa.read_csv(url)

url2 = 'https://raw.githubusercontent.com/estrickler1/RedShift/main/CFA2.csv'
data2 = pa.read_csv(url2)

url3 = 'https://raw.githubusercontent.com/estrickler1/RedShift/main/zwicky.csv'
zwicky = pa.read_csv(url3)

In [65]:
data = data[1:-7] #removing 
data2 = data2[1:-10]

In [66]:
data = data.iloc[:,0:8]
data2 = data2.iloc[:,0:8]

In [67]:
data

Unnamed: 0,Name,RA (1950),Dec m_zw,v_h,sig sour,Type,D1,D1.1
1,00006+2142,36,21420014.4,6605,3212700,5P,1,0.7
2,00006-0211,36,-2110014.3,7323,3112700,4X,1.3,0.8
3,N7814,41.1,15520312,1050,410611,2A s,6.5,2.7
4,00010+2256,100,22560014,7301,3412700,5,1.9,1
5,N7816,112,7110014,5241,510620,4,2,2
...,...,...,...,...,...,...,...,...
2391,N7805,235852.7,31092014.3,4948,2112700,-2X P,1,0.7
2392,N7806,235854,31100014.4,4827,2212700,4X P,1.1,0.8
2393,23591+2313,235906,23130013.2,4371,810620,6 R,2.7,1.3
2394,23592+2314,235912,23140013.9,4336,710620,7B P,1.6,0.5


This is the galaxy N7814 listed in row 3.

![N7814](https://drive.google.com/uc?export=view&id=1UTecn2UdNCUwyvEsOhz8VOHG21_YHr6b)

This is the galaxy N7806 listed in row 2392.

![N7806](https://drive.google.com/uc?export=view&id=1pRUJsWgqbbQBMKtDzP-Xk4IBMDNxKlZV)

Combine the different datasets into one table.

In [68]:
diction ={}
for i in range(8):
  diction[data2.columns[i]] = data.columns[i]
diction
data2 = data2.rename(columns = diction)

In [69]:
frames = [data,data2]

data = pa.concat(frames, ignore_index= True)


In [70]:
zwicky.columns

Index(['ra2000  ', 'Dec-00', 'Zmag', 'cz   ', 'cze', 'T', 'U', 'Ne',
       'Zname       ', 'C', 'Ref ', 'Oname      ', 'M', 'N'],
      dtype='object')

In [71]:
data.columns

Index(['Name', 'RA (1950)', 'Dec m_zw', 'v_h', 'sig sour', 'Type', 'D1',
       'D1.1'],
      dtype='object')

In [72]:
diction2 ={'ra2000  ':'RA (1950)', 'Dec-00':'Dec m_zw', 'cz   ':'v_h', 'Oname      ':'Name'}

zwicky = zwicky.rename(columns = diction2)
zwicky.shape

(19367, 14)

In [73]:
frames = [data, zwicky]

data = pa.concat(frames, ignore_index= True)

In [74]:
data[data['v_h'] == '1 5261']

Unnamed: 0,Name,RA (1950),Dec m_zw,v_h,sig sour,Type,D1,D1.1,Zmag,cze,T,U,Ne,Zname,C,Ref,M,N
64,N 182,3536.0,2270013.5,1 5261,600620,1X T,2.3,1.8,,,,,,,,,,
2758,N 182,3538.2,2271513.5,1 5261,600620,1X T,2.3,1.8,,,,,,,,,,


In [75]:
data = data[~data.v_h.isnull()]


Deleting every row with a flawed $v_h$ value. Then putting the values that have not been deleted back into the table.


In [76]:
badnumbers = data.v_h.loc[data['v_h'].str.match(r'[0-9.]+ ')] #starts with a number and then a space
goodnumbers = badnumbers.str.slice(2,) #get rid of that # and space

In [77]:
 greatnumbers = data.v_h.loc[~data['v_h'].str.match(r'[0-9.]+ ')] #get all the ones that didn't have the above issue

In [78]:
somenumbers = pa.concat([greatnumbers,goodnumbers]) #put them back together

In [79]:
data['v_h'] = somenumbers

In [80]:
data = data.loc[~data['v_h'].str.contains(r'[A-z]')] #removing the ones that had months...
data = data.loc[~data['v_h'].str.contains(r' ')]

In [81]:
data.v_h.astype(int)

0         6605
1         7323
2         1050
3         7301
4         5241
         ...  
39961    19201
39962     5512
39963     8590
39964     7578
39965     7649
Name: v_h, Length: 39481, dtype: int64

In [82]:
data['v_h'] = pa.to_numeric(data['v_h'])

In [83]:
data.describe()

Unnamed: 0,v_h,Zmag,U,Ne
count,39481.0,19204.0,19204.0,19204.0
mean,7181.92599,14.912602,0.55176,0.145438
std,10664.544619,0.762431,0.855416,0.445358
min,-571.0,4.3,0.0,0.0
25%,4559.0,14.7,0.0,0.0
50%,6821.0,15.2,0.0,0.0
75%,9280.0,15.4,1.0,0.0
max,926681.0,15.7,4.0,5.0


In [84]:
data = data.astype({'Dec m_zw':str})

In [85]:
#data = data.loc[~data['Dec m_zw'].str.match(r'[0-9.]+ ')]
data = data.loc[~data['Dec m_zw'].str.contains(r'[A-z]')]
data = data.loc[~data['Dec m_zw'].str.contains(r' ')]
data = data[~(data['Dec m_zw'] == '-001331 9.7')]
decData = data["Dec m_zw"]

In [86]:
data = data.reset_index()

data[190:195]

Unnamed: 0,index,Name,RA (1950),Dec m_zw,v_h,sig sour,Type,D1,D1.1,Zmag,cze,T,U,Ne,Zname,C,Ref,M,N
190,190,01300+1804,12959.7,18033314.3,686,3612812,10,1.2,0.8,,,,,,,,,,
191,191,I1715,13054.9,12194814.5,4183,3312700,10,0.7,0.5,,,,,,,,,,
192,193,01316-0117,13134.8,-1171614.4,4929,2612700,2A,1.5,0.5,,,,,,,,,,
193,194,N 606,13206.0,21100014.5,9956,3012700,5B,1.7,1.5,,,,,,,,,,
194,195,N 622,13325.6,243514.1,5187,3012700,4B T,2.1,1.7,,,,,,,,,,


Right ascension is the angular distance of a particular point measured eastward along the celestial equator from the Sun at the March equinox to the point in question above the earth.

Take the given RA (1950) value, which is listed in hours, minutes, and seconds (HHMMSS), then convert it into degrees. One day is equal to 360 degrees.

In [87]:
raData = data["RA (1950)"]

In [88]:
raData = raData.astype(float)
raInDegrees = []

for point in raData:
  x = math.floor(point/10000)
  y = math.floor((point - x*10000)/100)
  z = point - x*10000-y*100
  decimal = x + y/60 + z/3600
  degrees = decimal *360/24
  raInDegrees.append(degrees)

raInDegrees

data["RADegrees"] = raInDegrees 


Declination's angle is measured north or south of the celestial equator.

Take the given Dec m_zw data, which is given in degrees, minutes, seconds, and milliseconds (DDDMMSSmm), then convert it to degrees.

In [89]:
data["Dec m_zw"]

0        21420014.4
1        -2110014.3
2          15520312
3          22560014
4           7110014
            ...    
39460         25627
39461        125818
39462         40528
39463        271238
39464         32107
Name: Dec m_zw, Length: 39465, dtype: object

In [90]:
decData = decData.astype(float)

decInDegrees = []

for point in decData:
  if point>0:
    w = math.floor(point/1000000)
    x = math.floor((point - w*1000000)/10000)
    y = math.floor((point - w*1000000-x*10000)/100)
    z = point - w*1000000 - x*10000 - y*100
    decimal = x/60 + y/3600 + z/(3.6*10**6)
    degrees = decimal *360/24 + w
    decInDegrees.append(degrees)
  else:
    point = abs(point)
    w = math.floor(point/1000000)
    x = math.floor((point - w*1000000)/10000)
    y = math.floor((point - w*1000000-x*10000)/100)
    z = point - w*1000000 - x*10000 - y*100
    decimal = x/60 + y/3600 + z/(3.6*10**6)
    degrees = decimal *360/24 + w
    decInDegrees.append(-degrees)

data["DecDegrees"] = decInDegrees

In [91]:
new =[]
newRA =[]
for i in range(len(decInDegrees)):
  if decInDegrees[i] < 0:
    new.append(abs(decInDegrees[i]))
    if raInDegrees[i] <= 180:
      newRA.append(raInDegrees[i] + 180)
    else:
      newRA.append(raInDegrees[i] - 180)
  else:
    new.append(decInDegrees[i])
    newRA.append(raInDegrees[i])

data["NewDecDegrees"] = new
data["NewRA"] = newRA

In [92]:
data.describe()

Unnamed: 0,index,v_h,Zmag,U,Ne,RADegrees,DecDegrees,NewDecDegrees,NewRA
count,39465.0,39465.0,19204.0,19204.0,19204.0,39465.0,39465.0,39465.0,39465.0
mean,20051.582389,7184.693399,14.912602,0.55176,0.145438,174.774973,20.352184,20.557279,176.71796
std,11512.726493,10665.81596,0.762431,0.855416,0.445358,86.885083,20.521111,20.315645,86.309082
min,0.0,-571.0,4.3,0.0,0.0,0.007083,-15.929221,8e-06,0.007083
25%,10062.0,4561.0,14.7,0.0,0.0,126.5025,5.054217,5.283462,129.38625
50%,20068.0,6823.0,15.2,0.0,0.0,179.57625,12.829175,12.908397,181.315417
75%,30023.0,9280.0,15.4,1.0,0.0,227.145,31.10006,31.10006,228.2325
max,39965.0,926681.0,15.7,4.0,5.0,359.975,98.979229,98.979229,359.984167


In [93]:
fig = px.scatter_polar(data, r="NewDecDegrees", theta="NewRA", color = "v_h", color_continuous_scale=[(0.00, "red"),   (.03, "red"),
                                                     (.03, "blue"), (.07, "blue"),
                                                     (.07, "magenta"), (.12, "magenta"),
                                                     (.12, "cyan"),(.25,"cyan"),
                                                     (.25, "green"),  (1, "green")],range_color=[0,100000]  )
fig
        

In [103]:
def newtonMethodforFindingTheta(phi):
  theta = phi*np.pi/180
  for i in range(10):#checked and most in the test set converged very quickly 5 is maybe more steps than needed?
    theta = theta - (2*theta +math.sin(2*theta)-np.pi*math.sin(phi))/(2+2*math.cos(2*theta))
  return theta



In [104]:
thetas = []
for deg in data["RADegrees"]:
  thetas.append(newtonMethodforFindingTheta(deg))




In [105]:
R = 10
l = 0
xs = []
ys = []

for i,theta in enumerate(thetas):
  ls = data["DecDegrees"][i]*np.pi/180
  xs.append(R*2*math.sqrt(2)/np.pi*(ls-l)*math.cos(theta))
  ys.append(R*math.sqrt(2)*math.sin(theta))

In [106]:
data["X"] = xs
data["Y"] = ys

fig = px.scatter(data, x="X", y="Y", color = "v_h", color_continuous_scale=[(0.00, "red"),   (.03, "red"),
                                                     (.03, "blue"), (.07, "blue"),
                                                     (.07, "magenta"), (.12, "magenta"),
                                                     (.12, "cyan"),(.25,"cyan"),
                                                     (.25, "green"),  (1, "green")],range_color=[0,100000]  )
fig

![ZCAT 2000](https://drive.google.com/uc?export=view&id=1xnuRUcgVl7UxQ1y6N3GP8EvV9U5IQ7xF)

![Constellations](https://drive.google.com/uc?export=view&id=18KT8Ozqm2k8xSgXej-XoB-qYwu-YalHt)