This notebook shows a simple example of how to trim data / make cuts based on certain chosen parameters. 

In [3]:
# 
import pandas as pd 
import numpy as np 

In [4]:
# The data we want to use is in GAMA_Galaxies and it is called 'GAMA_sample1.csv
!ls ../GAMA_Galaxies 

GAMA_sample1.csv


In [14]:
# Import data 
path = '../GAMA_Galaxies/GAMA_sample1.csv' # .. tells your computer to move working directory "up" to parent (GAMA) and then change directories into everything following the slash / 
gama = pd.read_csv(path) 
gama

Unnamed: 0,CATAID,RA,DEC,CATAID.1,Z,nQ,SURVEY_CODE,Z_TONRY,logmstar,dellogmstar
0,6802,174.00600,0.72093,6802,0.05054,4,5,0.051814,9.076030,0.131666
1,6806,174.02279,0.70594,6806,0.33124,4,5,0.332854,10.979200,0.099166
2,6808,174.10071,0.65891,6808,0.22936,4,5,0.230851,10.791600,0.098116
3,6810,174.10908,0.80390,6810,0.32630,4,5,0.327908,11.119700,0.096136
4,6816,174.01896,0.66580,6816,0.07641,4,5,0.077715,9.012390,0.146456
...,...,...,...,...,...,...,...,...,...,...
113153,4321789,219.54175,1.20398,4321789,0.29884,4,5,0.299822,10.410600,0.131773
113154,4322179,220.18208,1.03991,4322179,0.22929,3,5,0.230207,9.689020,0.164714
113155,4327264,215.73183,1.66110,4327264,0.00006,4,5,0.000001,0.464692,0.168939
113156,4327642,222.77558,1.53559,4327642,0.00034,4,5,0.000004,1.253310,0.103429


In [15]:
# To drop columns (removing them from your working DataFrame
drop_col = ['CATAID.1', 'Z_TONRY', 'SURVEY_CODE'] # create a list of the column names that you want to drop 
gama = gama.drop(drop_col, axis=1) # drop these from the list

In [16]:
gama # columns should be removed

Unnamed: 0,CATAID,RA,DEC,Z,nQ,logmstar,dellogmstar
0,6802,174.00600,0.72093,0.05054,4,9.076030,0.131666
1,6806,174.02279,0.70594,0.33124,4,10.979200,0.099166
2,6808,174.10071,0.65891,0.22936,4,10.791600,0.098116
3,6810,174.10908,0.80390,0.32630,4,11.119700,0.096136
4,6816,174.01896,0.66580,0.07641,4,9.012390,0.146456
...,...,...,...,...,...,...,...
113153,4321789,219.54175,1.20398,0.29884,4,10.410600,0.131773
113154,4322179,220.18208,1.03991,0.22929,3,9.689020,0.164714
113155,4327264,215.73183,1.66110,0.00006,4,0.464692,0.168939
113156,4327642,222.77558,1.53559,0.00034,4,1.253310,0.103429


In [17]:
# To rename a column:   	
gama.rename(columns = {'nQ':'normalized redshift quality'}, inplace = True) # nQ = Normalised redshift quality (use nQ > 2 for science)

In [18]:
gama

Unnamed: 0,CATAID,RA,DEC,Z,normalized redshift quality,logmstar,dellogmstar
0,6802,174.00600,0.72093,0.05054,4,9.076030,0.131666
1,6806,174.02279,0.70594,0.33124,4,10.979200,0.099166
2,6808,174.10071,0.65891,0.22936,4,10.791600,0.098116
3,6810,174.10908,0.80390,0.32630,4,11.119700,0.096136
4,6816,174.01896,0.66580,0.07641,4,9.012390,0.146456
...,...,...,...,...,...,...,...
113153,4321789,219.54175,1.20398,0.29884,4,10.410600,0.131773
113154,4322179,220.18208,1.03991,0.22929,3,9.689020,0.164714
113155,4327264,215.73183,1.66110,0.00006,4,0.464692,0.168939
113156,4327642,222.77558,1.53559,0.00034,4,1.253310,0.103429


### Filtering Data

In [22]:
# To cut the GAMA DataFrame by one column: 
gama['RA']

0         174.00600
1         174.02279
2         174.10071
3         174.10908
4         174.01896
            ...    
113153    219.54175
113154    220.18208
113155    215.73183
113156    222.77558
113157    180.76758
Name: RA, Length: 113158, dtype: float64

In [28]:
# Note that this returns a Pandas Series (like a column vector) rather than a Pandas DataFrame
type(gama['RA'])

pandas.core.series.Series

In [29]:
# To trim catalog by multiple columns: 
desired_columns = ['CATAID', 'RA', 'DEC', 'Z']
gama.loc[:, desired_columns]

Unnamed: 0,CATAID,RA,DEC,Z
0,6802,174.00600,0.72093,0.05054
1,6806,174.02279,0.70594,0.33124
2,6808,174.10071,0.65891,0.22936
3,6810,174.10908,0.80390,0.32630
4,6816,174.01896,0.66580,0.07641
...,...,...,...,...
113153,4321789,219.54175,1.20398,0.29884
113154,4322179,220.18208,1.03991,0.22929
113155,4327264,215.73183,1.66110,0.00006
113156,4327642,222.77558,1.53559,0.00034


In [31]:
# Note that this is NOT like dropping columns, this slices the DataFrame but does not save it
gama # printing the DataFrame shows the old (unsliced version)

Unnamed: 0,CATAID,RA,DEC,Z,normalized redshift quality,logmstar,dellogmstar
0,6802,174.00600,0.72093,0.05054,4,9.076030,0.131666
1,6806,174.02279,0.70594,0.33124,4,10.979200,0.099166
2,6808,174.10071,0.65891,0.22936,4,10.791600,0.098116
3,6810,174.10908,0.80390,0.32630,4,11.119700,0.096136
4,6816,174.01896,0.66580,0.07641,4,9.012390,0.146456
...,...,...,...,...,...,...,...
113153,4321789,219.54175,1.20398,0.29884,4,10.410600,0.131773
113154,4322179,220.18208,1.03991,0.22929,3,9.689020,0.164714
113155,4327264,215.73183,1.66110,0.00006,4,0.464692,0.168939
113156,4327642,222.77558,1.53559,0.00034,4,1.253310,0.103429


In [32]:
# To save this sliced version, you need to assign a variable to this sliced DataFrame: 
sliced_gama = gama.loc[:, ('CATAID', 'RA', 'DEC', 'Z')] # one could have used "desired_columns" instead, but you can also just type in the column names directly here
sliced_gama

Unnamed: 0,CATAID,RA,DEC,Z
0,6802,174.00600,0.72093,0.05054
1,6806,174.02279,0.70594,0.33124
2,6808,174.10071,0.65891,0.22936
3,6810,174.10908,0.80390,0.32630
4,6816,174.01896,0.66580,0.07641
...,...,...,...,...
113153,4321789,219.54175,1.20398,0.29884
113154,4322179,220.18208,1.03991,0.22929
113155,4327264,215.73183,1.66110,0.00006
113156,4327642,222.77558,1.53559,0.00034


In [33]:
# Function to slice GAMA dataframe by column names: 
def GAMA_slice(gama, desired_columns):
    '''
    Slices the GAMA DataFrame by a desired list of column names. WIP want to add iloc functionality.  

    Inputs: 
    gama (PD DataFrame): Current working GAMA data catalog as a Pandas DataFrame. 
    desired_columns (list): List of columns you want to slice by. 
    Returns: Pandas DataFrame
    '''
    return gama.loc[:, desired_columns]

stellar_masses = GAMA_slice(gama, desired_columns=('CATAID', 'logmstar'))
stellar_masses # can now use this function whenever one wants to slice by columns. 

Unnamed: 0,CATAID,logmstar
0,6802,9.076030
1,6806,10.979200
2,6808,10.791600
3,6810,11.119700
4,6816,9.012390
...,...,...
113153,4321789,10.410600
113154,4322179,9.689020
113155,4327264,0.464692
113156,4327642,1.253310
