# Parsing data files

NLTK already contains the required corpora. The simplest way is to reuse the NLTK reviews reader and the Review object:

In [205]:
import nltk
from nltk import word_tokenize
from nltk.corpus import product_reviews_1
from textblob import TextBlob # for noun phrase extraction
from apyori import apriori # For the Apriori algorithm
import string
import time

nltk.download('product_reviews_1')
camera_reviews = product_reviews_1.reviews('Canon_G3.txt')
r = camera_reviews[0]
r.review_lines[0].features

# review.features()
# print(len(camera_reviews))
#review.sents()

[nltk_data] Downloading package product_reviews_1 to
[nltk_data]     /Users/andrejwork/nltk_data...
[nltk_data]   Package product_reviews_1 is already up-to-date!


[('canon powershot g3', '+3')]

# Preprocessing

For the preprocessing

In [287]:
class PSent:
    def __init__(self, s, f):
        self.raw = s # Sentence in raw format
        self.pp = [] # Sentence after preprocessing
        self.ft = [] # Features found in the sentence
        self.op = [] # Opinion words
        self.pos = {} # Sentiment per feature positive
        self.neg = {} # Sentiment per feature negative
        self.test = f # Test features labelled by human
        self.eval = {} # Evaluation score
        
    def __repr__(self):
        return "PSent"
    
    def __str__(self):
        return ' '.join(self.raw)
        
class PRev:
    def __init__(self, r):
        self.NltkReview = r
        self.review = []        
        self.eval = {} # Evaluation score
        
    def preprocess(self, 
                         spelling = False,
                         stemming = False):
        
        es = nltk.stem.SnowballStemmer('english')
        
        self.review = []
        
        for rl in self.NltkReview.review_lines:
            
            ps = PSent(rl.sent, 
                       rl.features) # Human labelled features
            
            a = ' '.join(rl.sent)
            a = TextBlob(a)

            # 2. spelling correction 
            if(spelling):                    
                a = a.correct() 

            # 3. Chunking noun phrases
            a = a.noun_phrases

            # 4. Stemming the words in the noun phrases
            if(stemming):      
                a = [es.stem(np) for np in a]
            else:
                a = [np for np in a]
            
            ps.pp=a
            self.review.append(ps)
            
    def sentiment(self):
        # Detecting sentiment polarity in the sentences
        # containing product features        
        for s in self.review:               
            if s.ft:
                # print("\n"+str(s))
                p = TextBlob(str(s)).sentiment.polarity
                if( p > 0 ):
                    for f in s.ft:
                        # fkey = ' '.join(f)
                        s.pos.setdefault(f, 0)
                        s.pos[f] += 1
                elif( p < 0 ):
                    for f in s.ft:
                        for f in s.ft:
                            # fkey = ' '.join(f)
                            s.pos.setdefault(f, 0)
                            s.pos[f] -= 1
            
    def __str__(self):
        rstr = self.NltkReview.title
        for s in self.NltkReview.sents():
            rstr += ' '.join(s)+'\n'
        return rstr

    
class PReviews:
    
    def __init__(self, nltk_corpus):  
        self.name = nltk_corpus  
        self.NltkCorpus = product_reviews_1.reviews(nltk_corpus)
        self.eval = {} # Evaluation score
        self.report = {}
        self.test_report = {}
        
        self.revs = []
        for r in self.NltkCorpus:
            self.revs.append(PRev(r))
            
        print(len(self.revs))
        
    def preprocess(self, 
                         spelling = False,
                         stemming = False):
        
        start = time.time()

        for r in self.revs:
            r.preprocess(spelling = spelling,
                         stemming = stemming)
            
        end = time.time()

        print("Preprocessed {} reviews in {:.2f} seconds (spelling correction={}, stemming={})"
              .format(len(self.revs), end-start, spelling, stemming))
        
    def features(self,
                 min_support=0.005, 
                 min_confidence=0.2, 
                 min_lift=3, 
                 min_length=3):
        
        if not self.revs:
            print("Preprocessing step must be run first.")
            return
        
        l = []
        for r in self.revs:
            for s in r.review:
                if s.pp:
                    for np in s.pp:
                        ## Append lemmatized noun phrases
                        l.append(list(TextBlob(np).words.lemmatize()))
                        
        # print(l)
        # Applying Apriori algorithm to find the frequent sets:
        self.association_rules = apriori(l, 
                                    min_support=min_support, 
                                    min_confidence=min_confidence, 
                                    min_lift=min_lift, 
                                    min_length=min_length)
        
        # Find features in individual revies (per sentence)
        # Tag those sentences where features were found
        for r in self.revs:
            for s in r.review:
                # print("Checking review for features")
                for f in association_results:
                    feat = list(f.items)
                    # print("\n\nChecking ", f.items)
                    found = False
                    for word in feat:
                        # print("Check if {} in {}".format(e, s))
                        found = False
                        for np in s.pp:
                            if word in np:
                                # print("Found feature {} in {}".format(e, np))
                                found = True
                                break
                        if not found:
                            break                
                    if(found):                        
                        featstr = feat[1]+' '+feat[0]
                        # print("Found {} in {}".format(feat[::-1], s))
                        # s.ft.append(feat[::-1])
                        s.ft.append(featstr)
        
        return self.association_rules
    
    def opinions(self):
        # Extracting opinion words (adjectives and adverbs)
        # JJ adjective, JJR adj comparative, JJS adj superlative
        # RB adverb, RBR comparative, RBS seperlative
        for r in self.revs:
            for s in r.review:               
                for w in TextBlob(str(s)).tags:
                    if (w[1][:2] == 'JJ' or
                       w[1][:2] == 'RB'):
                            # print(w[0])
                            s.op.append(w[0])
    
    def sentiment(self):
        # Detecting sentiment polarity in the sentences
        # containing product features
        for r in self.revs:
            r.sentiment()
                        
    def gen_report(self):
        
        # Generate an product feature report
        # Collect sentiment per feature from each review and
        # store in tuple dict[feature] = (positive_count, negative_count)
        
        for r in self.revs:
            for s in r.review:               
                for feat, polarity in s.pos.items():
                    self.report.setdefault(feat, (0,0))
                    self.report[feat] = (self.report[feat][0]+1,
                                         self.report[feat][1])
                for feat, polarity in s.neg.items():
                    self.report.setdefault(feat, (0,0))
                    self.report[feat][1] = (self.report[feat][0],
                                            self.report[feat][1]-1)
            
        return self.report
    
    def print_report(self):
        if not self.report:
            self.gen_report()
            
        print("\nProduct: ", self.name)
        for feat, score in self.report.items():
            print("\t\nFeature: ", feat)
            print("\t\tPositive: ", self.report[feat][0])
            print("\t\tNegative: ", self.report[feat][1])
    
    def extract_tagged_data(self):
        self.test_report = {}
        for r in self.NltkCorpus:
            # print("\n")
            for f in r.features():
                feat = f[0] # name of feature
                score_sign = f[1][0] #just the plus or minus sign
                self.test_report.setdefault(feat, (0,0))
                if score_sign == '+':
                    self.test_report[feat] = (self.test_report[feat][0]+1,
                                                 self.test_report[feat][1])
                elif score_sign == '-':
                    self.test_report[feat] = (self.test_report[feat][0],
                                                 self.test_report[feat][1]+1)
        #print("\nProduct: ", self.name)
        # for feat, score in test_report.items():
        #    print("\t\nFeature: ", feat)
        #    print("\t\tPositive: ", test_report[feat][0])
        #    print("\t\tNegative: ", test_report[feat][1])
        return self.test_report
    
    def fscore(self, tp, fn, fp):
        recall = 0
        prec = 0
        fscore = 0
        
        if(tp+fn)>0:
            recall = tp/(tp+fn) 
        
        if(tp+fp)>0:
            prec = tp/(tp+fp) 
            
        if(recall+prec)>0:
            fscore = 2*(recall*prec)/(recall+prec)
        
        return recall, prec, fscore
    
    def evaluation(self):
        if not self.test_report:
            self.extract_tagged_data()
            
        if not self.report:
            self.gen_report()
            
        # Evaluation of feature extraction overall
        # Look at features that were picked up by mining and
        # those that were missed:
        cft = len(test_report.keys()) # total features in test data
        cfm = len(my_report.keys()) # total features in my model data
        cfmatch = 0 # count of matching features
        f_tp = []
        f_fp = []
        f_fn = []
        
        for feat in my_report.keys():
            if feat in test_report:
                cfmatch += 1
                f_tp.append(feat)
            else:
                f_fp.append(feat)
                
        for feat in test_report.keys():
            if feat not in my_report:                
                f_fn.append(feat)
                
        recall, prec, fscore = self.fscore(len(f_tp),
                                           len(f_fn),
                                           len(f_fp))
    
        print("Looking at all product features together:")
        print("|\tTP\t|\tFN\t|\tFP\t|\tRecall\t|\tPrec\t|\tF score\t|")
        print("|\t{0}\t|\t{1}\t|\t{2}\t|\t{3:.2f}\t|\t{4:.2f}\t|\t{5:.2f}\t|".format(len(f_tp), 
                                                                                     len(f_fn), 
                                                                                     len(f_fp),
                                                                                    recall,
                                                                                    prec,
                                                                                    fscore))
        
        print("\nTP")
        print('%s' % ', '.join(map(str, f_tp)))

        print("\nFN")
        print('%s' % ', '.join(map(str, f_fn)))

        print("\nFP")
        print('%s' % ', '.join(map(str, f_fp)))

        
        # Calculate recall / precision and F1 score per sentence, review and product:
        self.eval["tp"] = 0 # true positive (labelled feature found in mined features)
        self.eval["fn"] = 0 # false negative (labelled feature not found in mined features)
        self.eval["fp"] = 0 # false positive, mined feature that is not present in labelled
        self.eval["recall"] = 0 
        self.eval["prec"] = 0 
        self.eval["fscore"] = 0 
        for r in self.revs:
            r.eval["tp"] = 0 # true positive (labelled feature found in mined features)
            r.eval["fn"] = 0 # false negative (labelled feature not found in mined features)
            r.eval["fp"] = 0 # false positive, mined feature that is not present in labelled
            r.eval["recall"] = 0 
            r.eval["prec"] = 0 
            r.eval["fscore"] = 0 
            for s in r.review: # iterate through each sentence
                s.eval["tp"] = 0 # true positive (labelled feature found in mined features)
                s.eval["fn"] = 0    # false negative (labelled feature not found in mined features)
                s.eval["fp"] = 0    # false positive, mined feature that is not present in labelled
                s.eval["recall"] = 0 
                s.eval["prec"] = 0 
                s.eval["fscore"] = 0 
                for lf in s.test: # iterate through labelled features
                    if lf[0] in s.ft: # comparing to mined features
                        s.eval["tp"] += 1
                    else:                        
                        s.eval["fn"] += 1
                for mf in s.ft: # iterate through labelled features
                    found = False
                    for lf in s.test:                        
                        if mf == lf[0]:
                            found = True
                            break
                    if not found:
                        s.eval["fp"] += 1                        
                        
                r.eval["tp"] += s.eval["tp"] # true positive (labelled feature found in mined features)
                r.eval["fn"] += s.eval["fn"] # false negative (labelled feature not found in mined features)
                r.eval["fp"] += s.eval["fp"] # false positive, mined feature that is not present in labelled
                
                # Recall / precision / F1 score per sentence:
                s.eval["recall"], s.eval["prec"], s.eval["fscore"] = self.fscore(s.eval["tp"],
                                                                               s.eval["fn"],
                                                                               s.eval["fp"])
                
            self.eval["tp"] += r.eval["tp"] # true positive (labelled feature found in mined features)
            self.eval["fn"] += r.eval["fn"] # false negative (labelled feature not found in mined features)
            self.eval["fp"] += r.eval["fp"] # false positive, mined feature that is not present in labelled
                
            # Recall / precision / F1 score per review:
            r.eval["recall"], r.eval["prec"], r.eval["fscore"] = self.fscore(r.eval["tp"],
                                                                           r.eval["fn"],
                                                                           r.eval["fp"])
            
        # Recall / precision / F1 score per product:
        self.eval["recall"], self.eval["prec"], self.eval["fscore"] = self.fscore(self.eval["tp"],
                                                                                   self.eval["fn"],
                                                                                   self.eval["fp"])
        
        print("\n\nLooking at product features per individual sentence:")
        print("|\tTP\t|\tFN\t|\tFP\t|\tRecall\t|\tPrec\t|\tF score\t|")
        print("|\t{0}\t|\t{1}\t|\t{2}\t|\t{3:.2f}\t|\t{4:.2f}\t|\t{5:.2f}\t|"
              .format(self.eval["tp"],
                    self.eval["fn"],
                    self.eval["fp"],
                    self.eval["recall"],
                    self.eval["prec"],
                    self.eval["fscore"]))
        
        

        # Evaluation of sentiment analysis
        cpr = 0 # total number of positive reviews test data
        cprm = 0 # total number of positive reviews my model
        cnr = 0 # total number of negative reviews test data
        cnrm = 0 # total number of negative reviews my model
                    
                    
                    
        

In [288]:
# Run per product
c = PReviews("Canon_G3.txt")
c.preprocess()
c.features()
c.opinions()
c.sentiment()
#c.print_report()

45
Preprocessed 45 reviews in 0.20 seconds (spelling correction=False, stemming=False)


In [289]:
c.evaluation()

Looking at all product features together:
|	TP	|	FN	|	FP	|	Recall	|	Prec	|	F score	|
|	6	|	99	|	13	|	0.06	|	0.32	|	0.10	|

TP
canon g3, picture quality, digital camera, optical zoom, battery life, raw image

FN
canon powershot g3, use, picture, camera, feature, option, dial, viewfinder, speed, function, auto setting, photo quality, darn diopter adjustment dial, exposure control, metering option, spot metering, 4mp, zoom, focus, size, weight, design, lcd, digital zoom, software, lens cap, menu, button, control, lense, battery, auto mode, canera, print, photo, manual mode, feel, four megapixel, product, night mode, lens cover, zooming lever, color, white balance, price, grain, flash photo, noise, g3, lag time, flash, depth, external flash hot shoe, image, manual function, service, automode, canon, raw format, shape, light auto correction, white offset, low light focus, unresponsiveness, delay, shoot, 4mp camera, body, casing, performance, look, finish, tiff format, lag, import, manual, s

# Extracting opinion words


In [115]:
# print(c.revs[0])

# Extracting opinion words
#for s in c.revs[0].review:
c.opinions()

# print(c.features())

recently
powershot
extremely
satisfied
very
easy
recent
past
elderly
just
green
away
quite
nicely
few
highly
easily
/
x
visable
not
even
best
possible
yet
super
larger
larger
ll
quickly
larger
flash
bottom
well
easy
very
flexible
powerful
external
d
highly
excellent
advanced
many
great
first
digital
very
keen
technical
i
around
digital
worth
single
just
little
powershot
s
powershot
slr
full
manual
together
different
+
bigger
high
great
very
fast
good
type
n
perfect
main
not
backlit
visible
optical
well
only
old
hard
optical
instead
brilliant
too
overall
happy
extensive
different
extremely
helpful
complete
exhaustive
nearly
great
high
awesome
i
here
great
very
pleased
functional
automatic
really
bad
yet
panoramic
together
multiple
seamless
panoramic
absolutely
enormous
i
not
i
very
satisfied
many
useful
many
smaller
digital
easy
steady
slower
visible
wide
most
not
really
much
still
little
suprised
not
minor
highly
serious
digital
only
full
wonderful
amazing
most
own
already
digital
i
m


ve
only
slightly
better
just
digital
somewhat
flimsy
tiny
little
/
general
quite
highest
more
slowly
there
s
gb
remote
digital
s
internal
pretty
quickly
same
also
s
t
highly
enough
re
digital
easier
bottom
re
serious
digital
digital
just
sure
separate
same
ll
immediately
now
even
more
pleased
gray
white
especially
mixed
light
gray
white
also
first
determined
then
lighter
especially
useful
difficult
required
very
easy
probably
also
wide
now
too
very
comfortable
easy
best
digital
about
ultimately
outstanding
superior
almost
external
available
more
fully
many
not
s
d
less
enthusiast
featured
i
arguably
best
non
digital
almost
picture
amazing
s
better
also
better
simply
best
digital
there
ibm
type
compact
up
fine
not
maximum
very
close
gig
twice
nikon
better
else
only
minor
fairly
boxy
instead
slow
even
still
best
digital
available
definetely
great
proven
rather
heavy
great
semi


In [93]:
rules = c.features(min_support=0.005)
                    

Found ['canon', 'g3'] in i recently purchased the canon powershot g3 and am extremely satisfied with the purchase .
Found ['powershot', 'g3'] in i recently purchased the canon powershot g3 and am extremely satisfied with the purchase .
Found ['picture', 'quality'] in a few of my work constituants owned the g2 and highly recommended the canon for picture quality .
Found ['picture', 'quality'] in i ' m easily enlarging pictures to 8 1 / 2 x 11 with no visable loss in picture quality and not even using the best possible setting as yet ( super fine ) .
Found ['flash', 'external'] in bottom line , well made camera , easy to use , very flexible and powerful features to include the ability to use external flash and lense / filters choices .
Found ['picture', 'quality'] in i ' d highly recommend this camera for anyone who is looking for excellent quality pictures and a combination of ease of use and the flexibility to get advanced with many options to adjust if you like .
Found ['digital', 'ca

# Evaluation of results



In [20]:
f1 = association_results

[RelationRecord(items=frozenset({'zoom', '4x'}), support=0.005188067444876783, ordered_statistics=[OrderedStatistic(items_base=frozenset({'4x'}), items_add=frozenset({'zoom'}), confidence=1.0, lift=45.35294117647059), OrderedStatistic(items_base=frozenset({'zoom'}), items_add=frozenset({'4x'}), confidence=0.2352941176470588, lift=45.35294117647059)]), RelationRecord(items=frozenset({'auto', 'mode'}), support=0.005188067444876783, ordered_statistics=[OrderedStatistic(items_base=frozenset({'auto'}), items_add=frozenset({'mode'}), confidence=0.5714285714285714, lift=31.469387755102037), OrderedStatistic(items_base=frozenset({'mode'}), items_add=frozenset({'auto'}), confidence=0.2857142857142857, lift=31.469387755102037)]), RelationRecord(items=frozenset({'life', 'battery'}), support=0.01297016861219196, ordered_statistics=[OrderedStatistic(items_base=frozenset({'battery'}), items_add=frozenset({'life'}), confidence=0.43478260869565216, lift=30.474308300395258), OrderedStatistic(items_base

In [4]:
# Transform reviews into a list of lists of lemmatized noun phrases for the Apriori algorithm to find frequent sets:
l = []
for r in tokenized_reviews:
    for s in r:
        if s:
            for np in s:
                ## Append lemmatized noun phrases
                l.append(list(TextBlob(np).words.lemmatize()))
l


[['canon', 'powershot', 'g3'],
 ['recent', 'trip'],
 ['past', 'week', 'i'],
 ['elderly', 'group'],
 ['work', 'constituants'],
 ['picture', 'quality'],
 ['visable', 'loss'],
 ['picture', 'quality'],
 ['possible', 'setting'],
 ['flash', 'card'],
 ['4mp', 'camera'],
 ['bottom', 'line'],
 ['powerful', 'feature'],
 ['external', 'flash'],
 ['lense', 'filter', 'choice'],
 ['excellent', 'quality', 'picture'],
 ['great', 'job', 'canon'],
 ['digital', 'camera'],
 ['software', 'engineer'],
 ['technical', 'detail'],
 ['digital', 'camera'],
 ['cent', 'i'],
 ['powershot', 'g3'],
 ['s', 'powershot', 'series'],
 ['full', 'manual', 'control'],
 ['different', 'kind'],
 ['flash'],
 ['store', 'high', 'quality', 'image'],
 ['kingston', '512mb', 'cf'],
 ['good', 'choice'],
 ['cf', 'type', 'ii'],
 ['main', 'dial'],
 ['lens'],
 ['optical', 'viewfinder'],
 ['hard', 'optical', 'viewfinder', 'fan'],
 ['overall', 'i'],
 ['extensive', 'research'],
 ['megapixel', 'camera'],
 ['online', 'camera', 'review', 'site'],


In [21]:
# Applying Apriori algorithm to find the frequent sets:
# https://stackabuse.com/association-rule-mining-via-apriori-algorithm-in-python/

from apyori import apriori
association_rules = apriori(l, min_support=0.005, min_confidence=0.2, min_lift=3, min_length=3)
association_results = list(association_rules)
print("Done")
counter = 0
for a in association_results:
    print(a.items)
    counter +=1
print("Associations ", counter)
#print(association_rules[0])

Done
frozenset({'zoom', '4x'})
frozenset({'mode', 'auto'})
frozenset({'battery', 'life'})
frozenset({'camera', 'digital'})
frozenset({'film', 'camera'})
frozenset({'megapixel', 'camera'})
frozenset({'canon', 'g3'})
frozenset({'lens', 'cap'})
frozenset({'external', 'flash'})
frozenset({'flash', 'metz'})
frozenset({'powershot', 'g3'})
frozenset({'i', 'use'})
frozenset({'raw', 'image'})
frozenset({'screen', 'lcd'})
frozenset({'mode', 'manual'})
frozenset({'review', 'online'})
frozenset({'optical', 'zoom'})
frozenset({'quality', 'picture'})
frozenset({'shutter', 'speed'})
Associations  19


In [48]:
d = ""
for r in camera_reviews:
    for s in r.sents():
        d += ' '.join(s)
b = TextBlob(d).np_counts
{k: v for k, v in sorted(b.items(), key=lambda item: item[1])}

{'canon powershot g3': 1,
 'purchase .the camera': 1,
 'recent trip': 1,
 'past week i': 1,
 'elderly group .after i': 1,
 'way .they': 1,
 'work constituants': 1,
 'picture quality .i': 1,
 'visable loss': 1,
 'possible setting': 1,
 '4mp cameras .bottom line': 1,
 'powerful features': 1,
 'lense / filters choices .i': 1,
 'excellent quality pictures': 1,
 '.great job canon': 1,
 'yep .this': 1,
 'software engineer': 1,
 'technical details': 1,
 'cent i': 1,
 's powershot series': 1,
 'full manual control': 1,
 'brilliance .whether': 1,
 'different kind': 1,
 '+ flashes': 1,
 'etc .as': 1,
 'store high quality images': 1,
 'kingston 512mb cf': 1,
 '.a good choice': 1,
 'cf type ii': 1,
 'images .some things': 1,
 'main dial': 1,
 '* lens': 1,
 'optical viewfinder': 1,
 'hard optical viewfinder fans': 1,
 '.overall i': 1,
 'toy .i': 1,
 'megapixel cameras .a couple': 1,
 'online camera review sites': 1,
 'complete exhaustive reviews': 1,
 'great number': 1,
 'high resolution sample ima

In [28]:
import spacy # for noun phrase extraction
from textblob import TextBlob # for noun phrase extraction

nlp = spacy.load("en_core_web_sm")
doc = nlp(reviews[0][1])

for np in doc.noun_chunks:
    print(np)
    
tb = TextBlob(reviews[0][1])
tb.noun_phrases

the camera
fact
a recent trip
i
a picture
a vacationing elderly group


WordList(['recent trip', 'past week i', 'elderly group'])

# Manually parsing the corpora


In [19]:
def parse_tagged_reviews(path):
    with open(path, 'r') as f:
        reviews = []
        
        title = ""
        text = []
        for line in f.readlines():
            if line.startswith("*"): 
                # skip comment                
                continue
            elif line.startswith("[t]"):                 
                # title of new review
                if text: # but title can be empty                    
                    reviews.append(text)                
                text = [] # reset last review
                features = "" # reset last feature
                title = line[3:]
                # print("Title:", title)                
            elif line.startswith("##"): # sentence
                text.append(line[2:])
            elif not line.startswith("##") and "##" in line: #feature
                s = line.split("##")
                features = s[0]
                text.append(s[1])
        # append the last review
        reviews.append(text)
    return reviews


In [24]:
doc="Data/Customer_review_data/Canon G3.txt"
reviews = parse_tagged_reviews(doc)
reviews

[['i recently purchased the canon powershot g3 and am extremely satisfied with the purchase . \n',
  'the camera is very easy to use , in fact on a recent trip this past week i was asked to take a picture of a vacationing elderly group . \n',
  'after i took their picture with their camera , they offered to take a picture of us . \n',
  'i just told them , press halfway , wait for the box to turn green and press the rest of the way . \n',
  'they fired away and the picture turned out quite nicely . ( as all of my pictures have thusfar ) . \n',
  'a few of my work constituants owned the g2 and highly recommended the canon for picture quality . \n',
  "i 'm easily enlarging pictures to 8 1/2 x 11 with no visable loss in picture quality and not even using the best possible setting as yet ( super fine ) . \n",
  "ensure you get a larger flash , 128 or 256 , some are selling with the larger flash , 32mb will do in a pinch but you 'll quickly want a larger flash card as with any of the 4mp c