From 622769a0ed0beda916dc886d12f4e8070d9d06ca Mon Sep 17 00:00:00 2001
From: unknown <sappal@SAPPAL-W7-4.corp.adobe.com>
Date: Mon, 2 Feb 2015 21:42:32 +0530
Subject: [PATCH] Adding some more utility python script

---
 Counter.py                        | 18 ++++++++++++++
 README                            | 22 ++++++++++++++++-
 compare_files.py                  | 40 +++++++++++++++++++++++++++++++
 count_all_mp3_files_on_machine.py | 24 +++++++++++++++++++
 fetch_email_address.py            | 23 ++++++++++++++++++
 fetch_number_of_lines.py          | 21 ++++++++++++++++
 fetch_url_in_a_web_page.py        | 17 +++++++++++++
 list_all_specific_type_files.py   | 22 +++++++++++++++++
 read_textfile.py                  | 24 +++++++++++++++++++
 search_text_file_for_string.py    | 17 +++++++++++++
 simple_downloader.py              | 26 ++++++++++++++++++++
 word_frequency_in_python.py       | 23 ++++++++++++++++++
 12 files changed, 276 insertions(+), 1 deletion(-)
 create mode 100644 Counter.py
 create mode 100644 compare_files.py
 create mode 100644 count_all_mp3_files_on_machine.py
 create mode 100644 fetch_email_address.py
 create mode 100644 fetch_number_of_lines.py
 create mode 100644 fetch_url_in_a_web_page.py
 create mode 100644 list_all_specific_type_files.py
 create mode 100644 read_textfile.py
 create mode 100644 search_text_file_for_string.py
 create mode 100644 simple_downloader.py
 create mode 100644 word_frequency_in_python.py

diff --git a/Counter.py b/Counter.py
new file mode 100644
index 00000000000..6546badbdc9
--- /dev/null
+++ b/Counter.py
@@ -0,0 +1,18 @@
+__author__ = 'tusharsappal'
+## This script uses the dictionary utility of the python and counts the number of the characters in the string and their frequency
+def Counter(str):
+    d=dict()
+    for c in str:
+        if c not in d:
+            d[c]=1
+        else :
+            d[c]=d[c]+1
+
+    return  d
+
+
+## Just replace the method argument with the string for which you want to check 
+
+
+temp=Counter("Replace with the string for which you want to check ")
+print temp
\ No newline at end of file
diff --git a/README b/README
index c067f4eb046..ce8e4a6bfe1 100644
--- a/README
+++ b/README
@@ -34,4 +34,24 @@ script_count.py - This scans my scripts directory and gives a count of the diffe
 
 script_listing.py - This will list all the files in the given directory, it will also go through all the subdirectories as well
 
-testlines.py - This very simple script open a file and prints out 100 lines of whatever is set for the line variable
\ No newline at end of file
+testlines.py - This very simple script open a file and prints out 100 lines of whatever is set for the line variable
+
+compare_files.py - This script compares two text files
+
+count_all_mp3_files_on_machine.py - This script aims to provide a count of mp3 files present in a given directory 
+
+fetch_email_address.py - This script fetches the email address from the string provided 
+
+fetch_number_of_lines.py - This script fetches the number of lines from the text file provided
+
+fetch_url_in_a_web_page.py - This script fetches the url/ urls present in a webpage
+
+list_all_specific_type_files.py - This script list all the specific file types present in a directory 
+
+read_textfile.py - This script aims to read text file provided as an argument 
+
+search_text_file_for_string.py - This script aims to search for a string present in a text file 
+
+simple_downloader.py - This script aims to simulate a simple down loader 
+
+word_frequency.py - This script prints the word frequency in the text provided as an argument
diff --git a/compare_files.py b/compare_files.py
new file mode 100644
index 00000000000..1e09e0af824
--- /dev/null
+++ b/compare_files.py
@@ -0,0 +1,40 @@
+## this script compares  the two text files
+## first line of defence is to match the number of line in the file
+## second line of defence is to store the content in two arrays and then compare
+__author__ = 'tusharsappal'
+
+def compare_two_files(str_1,str_2):
+    print "Comparing starts"
+    print"First fetching the number of line"
+    with open(str_1) as fin :
+        lines = sum (1 for line in fin)
+    with open(str_2) as fin_2:
+        lines_2 = sum(1 for line in fin_2)
+
+    if(lines== lines_2):
+        print "First Line of defence is passed"
+        ins = open (str_1,"r")
+        array = []
+        for line_1 in ins:
+            array.append(line_1)
+        ins.close()
+        ins_2 = open (str_2,"r")
+        array_2=[]
+        for line_2 in ins_2:
+            array_2.append(line_2)
+        ins_2.close()
+        if (array==array_2):
+            print "In the Second Line of Defence the files have passed"
+        else :
+            print "In the second line of defence the files have failed"
+        
+        
+    else :
+        print "First line of defence is not passed"
+
+
+
+## replace the first parameter with the first file path and the second paramter with the second file path 
+compare_two_files("path to the first file ","path to the second file")
+        
+    
diff --git a/count_all_mp3_files_on_machine.py b/count_all_mp3_files_on_machine.py
new file mode 100644
index 00000000000..6f8d00fc6df
--- /dev/null
+++ b/count_all_mp3_files_on_machine.py
@@ -0,0 +1,24 @@
+__author__ = 'tusharsappal'
+
+
+import os
+
+## This script fetches the count of the total number of mp3 [Matter of fact you can change the type of the files to be fetched by changing the endswith type]files in the listed directory
+
+def count_all_mp3_files_on_machine():
+    count = 0
+    for (dirname,dirs,files) in os.walk('Replace the directory path where to serach like C:/ or / on UNIX like machines'):
+        for filename in files:
+            if filename.endswith(".mp3"):
+                count =count+1
+                thefile = os.path.join(dirname,filename)
+                print "The Name is ",thefile, "and the size is ", os.path.getsize(thefile)
+
+
+
+
+
+    print "The total number of mp3 files on the system are ",count
+
+
+count_all_mp3_files_on_machine()
\ No newline at end of file
diff --git a/fetch_email_address.py b/fetch_email_address.py
new file mode 100644
index 00000000000..491826148e2
--- /dev/null
+++ b/fetch_email_address.py
@@ -0,0 +1,23 @@
+
+__author__ = 'tusharsappal'
+
+
+## This script fetches the email addresses from the string provided , the script only fetches the email addresses of the format alphanumeric@alphabets
+
+import re
+def fetch_email_address(str):
+    fetcher=re.findall('[a-zA-Z0-9]\S+@\S+[a-zA-Z]]',str)
+    if len(fetcher)>0:
+        print fetcher
+
+
+
+## Replace the method argument with the string to be parsed, or you can modify the script to read the data from the text file
+
+
+fetch_email_address("Replace this argument with the string to be parsed")
+
+
+
+
+
diff --git a/fetch_number_of_lines.py b/fetch_number_of_lines.py
new file mode 100644
index 00000000000..f61a7ff7ddb
--- /dev/null
+++ b/fetch_number_of_lines.py
@@ -0,0 +1,21 @@
+## fetching the number of lines from a text file
+
+__author__ = 'tusharsappal'
+def fetch_number_of_lines(str):
+    print "Fetching starts from this point"
+    with open(str) as fin:
+        lines = sum (1 for line in fin)
+        print "Number of lines is "
+        print lines
+
+
+
+
+
+## replace the argument in the function call  to the path of the file
+
+fetch_number_of_lines("The path to the text file")
+
+        
+    
+
diff --git a/fetch_url_in_a_web_page.py b/fetch_url_in_a_web_page.py
new file mode 100644
index 00000000000..c52562271e8
--- /dev/null
+++ b/fetch_url_in_a_web_page.py
@@ -0,0 +1,17 @@
+import urllib
+import re
+__author__ = 'tusharsappal'
+
+## Enter the name of the url and this small snippet will fetch all the email links present in that web page
+def find_hyper_links_in_page():
+    url = raw_input("Enter the url to be searched--")
+    html =urllib.urlopen(url).read()
+    links =re.findall('href="(http://.*?)"', html)
+    for link in links:
+        print link
+
+
+
+
+
+find_hyper_links_in_page()
diff --git a/list_all_specific_type_files.py b/list_all_specific_type_files.py
new file mode 100644
index 00000000000..91844ebb63a
--- /dev/null
+++ b/list_all_specific_type_files.py
@@ -0,0 +1,22 @@
+__author__ = 'tusharsappal'
+import os
+
+## This program searches in the current working directory and lists all the python files
+
+
+
+def print_the_list_of_python_files():
+    cwd = os.getcwd()
+    print "The current working directory is ", cwd
+    cwd_2 = os.listdir(cwd)
+    for file in cwd_2:
+        if file.endswith(".py"):   ## To change the type of the file fetched change the ends with to the specific file name
+
+            print "The pyhton file is ", file
+
+
+
+
+
+print_the_list_of_python_files()
+
diff --git a/read_textfile.py b/read_textfile.py
new file mode 100644
index 00000000000..5abf3f8254f
--- /dev/null
+++ b/read_textfile.py
@@ -0,0 +1,24 @@
+# declaring a global array_store to store the data temporarily 
+__author__ = 'tusharsappal'
+array_store=[]
+def readFile(str):
+    print "reading starts from this part"
+    f=open(str,"r")
+    global array_store
+    
+    for line in f:
+        line=line.rstrip('\n')
+        array_store.append(line)
+        
+    for l in array_store:
+        print l
+    
+    return ;
+
+
+# enter the path in side the readFile method call statement
+readFile("Enter the text file path separated  by / slashes ")
+
+
+
+
diff --git a/search_text_file_for_string.py b/search_text_file_for_string.py
new file mode 100644
index 00000000000..80ea5bdf128
--- /dev/null
+++ b/search_text_file_for_string.py
@@ -0,0 +1,17 @@
+## This script searches a particular string in the text file
+__author__ = 'tusharsappal'
+def read_file(str,string_to_be_searched):
+    print "The reading starts here"
+    with open(str,"r") as input_data:
+        for line in input_data:
+            if line.find(string_to_be_searched)>-1 :
+                print "String found"
+                break
+
+
+
+## replace the first argument with the file path separated with forward slashes and second argument with string to be searched
+read_file("path_to_the_text_file","string_to_be_searched")
+            
+    
+       
diff --git a/simple_downloader.py b/simple_downloader.py
new file mode 100644
index 00000000000..1b5e0a904b1
--- /dev/null
+++ b/simple_downloader.py
@@ -0,0 +1,26 @@
+__author__ = 'tusharsappal'
+
+import urllib
+def simple_downloader(url,path_to_store_content):
+    img = urllib.urlopen(url)
+    fhand=open(path_to_store_content,'w')
+    size=0
+    while True:
+        info=img.read(10000)
+        if len(info)<1:
+            break
+        else :
+            size=size+len(info)
+        fhand.write(info)
+
+
+    fhand.close()
+
+
+
+##replace the first function parameter with the path of the the url  from which the content is to be fetched
+## And the second parameter with the path on your local system to store the content fetched
+
+simple_downloader("Replace with the URL","Replace with the path on your local system where we want to store the content")
+
+
diff --git a/word_frequency_in_python.py b/word_frequency_in_python.py
new file mode 100644
index 00000000000..38727a857bb
--- /dev/null
+++ b/word_frequency_in_python.py
@@ -0,0 +1,23 @@
+__author__ = 'tusharsappal'
+##  This script basically fetches  the file from the user specified location and finds the frequency of each word in the text file
+def word_frequency_counter():
+    fname=raw_input("Enter the Name of the file : ")
+    try:
+        fhand = open(fname)
+    except:
+        print("The file cannot  be opened",file)
+        exit()
+
+    counts= dict()
+    for line in fhand:
+        words = line.split()
+        for word in words:
+            if word not in counts:
+                counts[word]=1
+            else:
+                counts[word]=counts[word]+1
+
+
+    print counts
+
+word_frequency_counter()
\ No newline at end of file