In [1]:
# import necessary modules
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [None]:
def get_data(month, year):
    """This function will accept the month and year as arguments
       and return the data for that particular month
       Args:
           month: string containing the name of the month
           year:  string containing the year
       Return:
           month_df: dataframe for the given month
    """
    # create dataframe to store the scrapped data
    month_df = pd.DataFrame()
    
    # check the month to decide the number of days
    if month in ['Jan', 'Mar', 'May', 'Jul', 'Aug', 'Oct', 'Dec']:
        last_date = 31
    elif month in ['Apr', 'Jun', 'Sep', 'Nov']:
        last_date = 30 
    elif month == 'Feb' and int(year) % 4 == 0:
        last_date = 29
    else:
        last_date = 28
    
    # iterate over each day in the month
    for i in range(1, last_date + 1):
        if i < 10:
            # construct the date 
            date = '0' + str(i) + '-' + month + '-' + year
        else:
            date = str(i) + '-' + month + '-' + year
         
        # create url
        url = 'https://agmarknet.gov.in/SearchCmmMkt.aspx?Tx_Commodity=23&Tx_State=UP&Tx_District=0&Tx_Market=0&DateFrom={}&DateTo={}&Fr_Date={}&To_Date={}&Tx_Trend=2&Tx_CommodityHead=Onion&Tx_StateHead=Uttar+Pradesh&Tx_DistrictHead=--Select--&Tx_MarketHead=--Select--'.format(date, date, date, date)
        
        # get the request from the url
        r = requests.get(url)
        
        # create soup object
        soup = BeautifulSoup(r.text, 'lxml')
        try:
            # extract the data for the day
            df = pd.read_html(str(soup.find_all('table')))[0][:-2]
            # concatenate each day data with the month dataframe
            month_df = pd.concat([month_df, df], axis = 0)    
        except:
            print('No data available for {}'.format(date))
            
    print('Data scrapping done for month {} and year {}'.format(month, year))      
    
    return month_df

In [None]:
# create the list of the months 
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

# iterate over the years
for year in range(2002, 2021):
    # iterate over the months
    for month in months:
        # get the dataframe
        df = get_data(month, str(year))
        # construct the file name
        file_name = month + '_' + str(year)[-2:] + '.csv'
        # save the data as a csv file
        df.to_csv(file_name, index = False)

In [83]:
df = pd.DataFrame()
for file in os.listdir('Onion_Data'):
    if file.split('.')[0][-2:] == '02':
        print(file)
        month_df = pd.read_csv(os.path.join('Onion_Data', file))
        df = pd.concat([df, month_df], axis = 0)

Aug_02.csv
Jul_02.csv
Jun_02.csv
Nov_02.csv
Oct_02.csv


In [84]:
df.to_csv('data_2002.csv', index = False)

In [85]:
df.head()

Unnamed: 0,State Name,District Name,Market Name,Variety,Group,Arrivals (Tonnes),Min Price (Rs./Quintal),Max Price (Rs./Quintal),Modal Price (Rs./Quintal),Reported Date
0,Uttar Pradesh,Varanasi,Varanasi(Grain),Nasik,Vegetables,3700.0,460,510,480,02 Aug 2002
1,Uttar Pradesh,Varanasi,Varanasi(Grain),Nasik,Vegetables,3000.0,465,510,490,03 Aug 2002
2,Uttar Pradesh,Varanasi,Varanasi(Grain),Nasik,Vegetables,2200.0,475,510,500,05 Aug 2002
3,Uttar Pradesh,Varanasi,Varanasi(Grain),Nasik,Vegetables,2500.0,490,520,510,07 Aug 2002
4,Uttar Pradesh,Ghaziabad,Ghaziabad,Red,Vegetables,10.0,380,450,420,08 Aug 2002


In [86]:
df.shape

(386, 10)

In [None]:
import requests # to get image from the web
import shutil # to save it locally

## Set up the image URL and filename
image_url = "https://cdn.pixabay.com/photo/2020/02/06/09/39/summer-4823612_960_720.jpg"
filename = image_url.split("/")[-1]

# Open the url image, set stream to True, this will return the stream content.
r = requests.get(image_url, stream = True)

# Check if the image was retrieved successfully
if r.status_code == 200:
    # Set decode_content value to True, otherwise the downloaded image file's size will be zero.
    r.raw.decode_content = True
    
    # Open a local file with wb ( write binary ) permission.
    with open(filename,'wb') as f:
        shutil.copyfileobj(r.raw, f)
        
    print('Image sucessfully Downloaded: ',filename)
else:
    print('Image Couldn\'t be retreived')


In [None]:
from bs4 import BeautifulSoup
import requests
import re
import urllib2
import os
import cookielib
import json

def get_soup(url,header):
    return BeautifulSoup(urllib2.urlopen(urllib2.Request(url,headers=header)),'html.parser')


query = raw_input("query image")# you can change the query for the image  here
image_type="ActiOn"
query= query.split()
query='+'.join(query)
url="https://www.google.co.in/search?q="+query+"&source=lnms&tbm=isch"
print url
#add the directory for your image here
DIR="Pictures"
header={'User-Agent':"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"
}
soup = get_soup(url,header)


ActualImages=[]# contains the link for Large original images, type of  image
for a in soup.find_all("div",{"class":"rg_meta"}):
    link , Type =json.loads(a.text)["ou"]  ,json.loads(a.text)["ity"]
    ActualImages.append((link,Type))

print  "there are total" , len(ActualImages),"images"

if not os.path.exists(DIR):
            os.mkdir(DIR)
DIR = os.path.join(DIR, query.split()[0])

if not os.path.exists(DIR):
            os.mkdir(DIR)
###print images
for i , (img , Type) in enumerate( ActualImages):
    try:
        req = urllib2.Request(img, headers={'User-Agent' : header})
        raw_img = urllib2.urlopen(req).read()

        cntr = len([i for i in os.listdir(DIR) if image_type in i]) + 1
        print cntr
        if len(Type)==0:
            f = open(os.path.join(DIR , image_type + "_"+ str(cntr)+".jpg"), 'wb')
        else :
            f = open(os.path.join(DIR , image_type + "_"+ str(cntr)+"."+Type), 'wb')


        f.write(raw_img)
        f.close()
    except Exception as e:
        print "could not load : "+img
        print e