This notebook shows a simple example of how to trim data / make cuts based on certain chosen parameters. 

In [None]:
# Import relevant libraries
import pandas as pd 
import numpy as np 

In [None]:
# The data we want to use is in GAMA_Galaxies and it is called 'GAMA_sample1.csv
!ls ../GAMA_Galaxies 

In [None]:
# Import data 
path = '../GAMA_Galaxies/GAMA_sample1.csv' # .. tells your computer to move working directory "up" to parent (GAMA) and then change directories into everything following the slash / 
gama = pd.read_csv(path) 
gama

In [None]:
# To drop columns (removing them from your working DataFrame
drop_col = ['CATAID.1', 'Z_TONRY', 'SURVEY_CODE'] # create a list of the column names that you want to drop 
gama = gama.drop(drop_col, axis=1) # drop these from the list

In [None]:
gama # columns should be removed

In [None]:
# To rename a column:   	
gama.rename(columns = {'nQ':'normalized redshift quality'}, inplace = True) # nQ = Normalised redshift quality (use nQ > 2 for science)

In [None]:
gama

### Filtering Data by Column

In [None]:
# To cut the GAMA DataFrame by one column: 
gama['RA']

In [None]:
# Note that this returns a Pandas Series (like a column vector) rather than a Pandas DataFrame
type(gama['RA'])

In [None]:
# To trim catalog by multiple columns: 
desired_columns = ['CATAID', 'RA', 'DEC', 'Z']
gama.loc[:, desired_columns]

In [None]:
# Note that this is NOT like dropping columns, this slices the DataFrame but does not save it
gama # printing the DataFrame shows the old (unsliced version)

In [None]:
# To save this sliced version, you need to assign a variable to this sliced DataFrame: 
sliced_gama = gama.loc[:, ('CATAID', 'RA', 'DEC', 'Z')] # one could have used "desired_columns" instead, but you can also just type in the column names directly here
sliced_gama

In [None]:
# Function to slice GAMA dataframe by column names: 
def GAMA_slice(gama, desired_columns):
    '''
    Slices the GAMA DataFrame by a desired list of column names. WIP want to add iloc functionality.  

    Inputs: 
    gama (PD DataFrame): Current working GAMA data catalog as a Pandas DataFrame. 
    desired_columns (list): List of columns you want to slice by. 
    
    Returns: Pandas DataFrame
    '''
    return gama.loc[:, desired_columns]

stellar_masses = GAMA_slice(gama, desired_columns=('CATAID', 'logmstar'))
stellar_masses # can now use this function whenever one wants to slice by columns. 

In [None]:
### come back and add iloc functionality / example

### Filtering Data by Row Values

In [None]:
gama

In [None]:
# To look at values in a column name above/below a certain value: DATAFRAME[DATAFRAME['PARAMETER] > VALUE]
gama[gama['logmstar'] > 10] # gives all GAMA columns that meet the condition that logmstar is greater than 10 

In [None]:
# Different syntax gives the same thing 
gama[gama.logmstar > 10]

In [None]:
# Can also look for exact values (all conditional statements work here, as a reminder they are: >  >=  <  <=  ==  !=  |  or  and
gama[gama.logmstar == 10.9792] 

In [None]:
# To slice by multiple parameters and their values: 
gama[(gama.logmstar < 10) | (gama.Z > 0.3)]

In [None]:
# To actually drop the values we sliced: 
gama.drop(gama[ (gama.logmstar <10) | (gama.Z > 0.3) ].index , inplace=True)

In [None]:
# Now the GAMA catalog has only galaxies with logmstar greather than 10 and Z less than 0.3
gama 