In [1]:
# Version 1.02
# 28 Jan 2021
# EJ
#
# Changelog 1.02 - 25-Feb-2020
# Organized file directories
# Printing to .csv now only prints the existing servers and mountpoints (264 as of this writing)
# Printing to .csv now also removes the index column by adding 'index = False'

In [2]:
# Files needed
# envFS.csv - DPA Environment Google SpreadSheet for FileSystem
# prodFS.csv - PROD FS data from Splunk
# --------------------------------------------------------------------------------
# Helpful commands
#
# print(prodFS_df.loc[[x]]) # printing row x
# envFS_df.tail(10)
# prodFS_df = df.sort_values(by=['Column_name'], ascending=True) # to sort by column
# envFS_df.columns # for reference to check which column to join from DPAenv
# <variable/data container>.to_csv('filename') # for exporting
# --------------------------------------------------------------------------------
# LEGEND:
# FS - File System
# df - data file (.csv, xlx, etc)
# env - environment (official .csv Environment file from google sheets)
# --------------------------------------------------------------------------------
# Links:
# DPA Environment - https://docs.google.com/spreadsheets/d/1Ll7-mdb8tsGUKIDYJ-dMEBmydxXf24krk8J7r1RIUog/edit#gid=588246582
# Splunk (DPA PROD/Staging 2) - http://10.69.81.41:8000/en-US/app/splunk_app_for_linux_Infrastructure/dashboards

In [1]:
import pandas as pd
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 100)

In [2]:
# Assigning variable/data containers to .csv files
envFS_df = pd.read_csv('./sources/envFS.csv') # downloaded from Splunk weekly 
prodFS_df = pd.read_csv('./sources/prodFS.csv') # downloaded from Splunk weekly 

In [11]:
# To replace the NaN
envFS_df = envFS_df[envFS_df['Mount'].isna() == False] 
prodFS_df = prodFS_df[prodFS_df['Used'].isna() == False]

In [13]:
#envFS_df.head(10)
#prodFS_df.head(10)

Unnamed: 0,Environment,Host,Mount,WE 01.05,WE 01.12,WE 01.19,WE 01.26,WE 02.02,WE 02.14,WE 02.23,WE 03.01,WE 03.08,WE 03.15,WE 03.22,WE 03.29,WE 04.05,WE 04.12,WE 04.19,WE 04.26,WE 05.03,WE 05.10,WE 05.17,WE 05.24,WE 05.31,WE 06.07,WE 06.14,WE 06.21,WE 06.28,WE 07.05,WE 07.12,WE 07.19,WE 07.26,WE 08.02,WE 08.09,WE 08.16,WE 08.23,WE 08.31,WE 09.06,WE 09.13,WE 09.20,WE 09.27,WE 10.04,WE 10.11,WE 10.18,WE 10.25,WE 11.03,WE 11.08,WE 11.15,WE 11.22,WE 11.30,WE 12.09,WE 12.13,WE 12.20,WE 12.28,WE 01.04,WE 01.10,WE 01.17,WE 01.24,WE 01.28
0,PROD2,dpaanalytics,/,12.00%,,12.00%,13.00%,12.00%,13,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,12,13,12,13,13,13,13,13,13,13,13,14,15,15,15,15,15,15,15,16,15,16
1,PROD2,dpaanalytics,/appl/depot,1.00%,,1.00%,1.00%,1.00%,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
2,PROD2,dpaanalytics,/appl/di_shareddata,1.00%,,1.00%,1.00%,1.00%,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
3,PROD2,dpaanalytics,/appl/sasbackup,1.00%,,,1.00%,1.00%,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
4,PROD2,dpaanalytics,/appl/sasdata,1.00%,,,1.00%,1.00%,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
5,PROD2,dpaanalytics,/appl/saslogs,1.00%,,1.00%,1.00%,1.00%,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
6,PROD2,dpaanalytics,/dev,0.00%,,,0.00%,0.00%,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,PROD2,dpaanalytics,/sas,34.00%,,73.00%,72.00%,59.00%,31,44,44,76,26,26,65,63,78,58,73,35,62,62,58,48,51,55,74,76,73,58,43,63,63,68,53,53,51,44,44,65,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55
8,PROD2,dpaanalytics,/work,1.00%,,9.00%,1.00%,1.00%,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
9,PROD2,dpadesignmetadata,/,15.00%,,,15.00%,15.00%,16,16,15,16,16,16,16,16,17,17,17,18,17,17,18,18,18,18,18,19,19,19,20,20,20,22,22,23,23,24,25,24,25,26,26,26,27,27,27,28,31,30,31,31,33,33,33,34,34,34,35


In [6]:
# Merging the two .csv files according to 'Host' and 'Mount' via inner join
mergeData = pd.merge(envFS_df,prodFS_df, left_on=['Host', 'Mount'],right_on=['Host','Mount'], how='inner')
mergeData[['Host', 'Mount', 'Used']].sort_values(['Host', 'Mount']) # uncomment to print to screen / comment to unprint

Unnamed: 0,Host,Mount,Used
0,dpaanalytics,/,15.0
1,dpaanalytics,/appl/depot,1.0
2,dpaanalytics,/appl/di_shareddata,1.0
3,dpaanalytics,/appl/sasbackup,1.0
4,dpaanalytics,/appl/sasdata,1.0
...,...,...,...
259,ip-10-237-82-146.ap-southeast-1.compute.internal,/appl/sasbackup,1.0
260,ip-10-237-82-146.ap-southeast-1.compute.internal,/appl/sashome,4.0
261,ip-10-237-82-146.ap-southeast-1.compute.internal,/appl/saslogs,1.0
262,ip-10-237-82-146.ap-southeast-1.compute.internal,/dev,0.0


In [7]:
# Printing the data to csv
mergeDataPrint = mergeData[['Host', 'Mount', 'Used']].sort_values(['Host', 'Mount']) 
mergeDataPrint.head(264).to_csv('./weekly_output/prod_FS_Weekly_Output.csv', index = False) 
print(mergeDataPrint.head(264))

                                                 Host                Mount  Used
0                                        dpaanalytics                    /  15.0
1                                        dpaanalytics          /appl/depot   1.0
2                                        dpaanalytics  /appl/di_shareddata   1.0
3                                        dpaanalytics      /appl/sasbackup   1.0
4                                        dpaanalytics        /appl/sasdata   1.0
..                                                ...                  ...   ...
259  ip-10-237-82-146.ap-southeast-1.compute.internal      /appl/sasbackup   1.0
260  ip-10-237-82-146.ap-southeast-1.compute.internal        /appl/sashome   4.0
261  ip-10-237-82-146.ap-southeast-1.compute.internal        /appl/saslogs   1.0
262  ip-10-237-82-146.ap-southeast-1.compute.internal                 /dev   0.0
263  ip-10-237-82-146.ap-southeast-1.compute.internal                /work  46.0

[264 rows x 3 columns]
